{"id":"https://openalex.org/W4225289150","doi":"https://doi.org/10.1109/icassp43922.2022.9747101","title":"Improving End-to-End Contextual Speech Recognition with Fine-Grained Contextual Knowledge Selection","display_name":"Improving End-to-End Contextual Speech Recognition with Fine-Grained Contextual Knowledge Selection","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225289150","doi":"https://doi.org/10.1109/icassp43922.2022.9747101"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747101","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747101","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027728339","display_name":"Minglun Han","orcid":"https://orcid.org/0000-0002-5120-069X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Minglun Han","raw_affiliation_strings":["Chinese Academy of Sciences,Institute of Automation","Institute of Automation, Chinese Academy of Sciences","School of Artificial Intelligence, University of Chinese Academy of Sciences","Bytedance AI Lab"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Institute of Automation","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Bytedance AI Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000236213","display_name":"Linhao Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Linhao Dong","raw_affiliation_strings":["Bytedance AI Lab"],"affiliations":[{"raw_affiliation_string":"Bytedance AI Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101894661","display_name":"Zhenlin Liang","orcid":"https://orcid.org/0000-0003-4269-4146"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhenlin Liang","raw_affiliation_strings":["Bytedance AI Lab"],"affiliations":[{"raw_affiliation_string":"Bytedance AI Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052057753","display_name":"Meng Cai","orcid":"https://orcid.org/0000-0002-0711-5949"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng Cai","raw_affiliation_strings":["Bytedance AI Lab"],"affiliations":[{"raw_affiliation_string":"Bytedance AI Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101400153","display_name":"Shiyu Zhou","orcid":"https://orcid.org/0000-0002-6889-0316"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiyu Zhou","raw_affiliation_strings":["Chinese Academy of Sciences,Institute of Automation","Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Institute of Automation","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110851569","display_name":"Zejun Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zejun Ma","raw_affiliation_strings":["Bytedance AI Lab"],"affiliations":[{"raw_affiliation_string":"Bytedance AI Lab","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108642431","display_name":"Bo Xu","orcid":"https://orcid.org/0000-0002-1111-1529"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Xu","raw_affiliation_strings":["Chinese Academy of Sciences,Institute of Automation","School of Artificial Intelligence, University of Chinese Academy of Sciences","Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Institute of Automation","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5027728339"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.6079,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.91446741,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"8532","last_page":"8536"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7999404668807983},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7701210975646973},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.7059586048126221},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5870082378387451},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5423184037208557},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5088820457458496},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.48770102858543396},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4789979159832001},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4376462996006012},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.4332396984100342},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4133397936820984}],"concepts":[{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7999404668807983},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7701210975646973},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.7059586048126221},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5870082378387451},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5423184037208557},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5088820457458496},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.48770102858543396},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4789979159832001},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4376462996006012},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.4332396984100342},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4133397936820984},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747101","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747101","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6899999976158142,"display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1494198834","https://openalex.org/W1828163288","https://openalex.org/W2102113734","https://openalex.org/W2127141656","https://openalex.org/W2327501763","https://openalex.org/W2395440424","https://openalex.org/W2403440562","https://openalex.org/W2407080277","https://openalex.org/W2886319145","https://openalex.org/W2889012072","https://openalex.org/W2892009249","https://openalex.org/W2898599211","https://openalex.org/W2936774411","https://openalex.org/W2937402758","https://openalex.org/W2962760690","https://openalex.org/W2962784628","https://openalex.org/W2962826786","https://openalex.org/W2972625221","https://openalex.org/W2973172693","https://openalex.org/W3015995734","https://openalex.org/W3016167541","https://openalex.org/W3097075707","https://openalex.org/W3097777922","https://openalex.org/W3097794466","https://openalex.org/W3140235797","https://openalex.org/W3160413945","https://openalex.org/W3198004110","https://openalex.org/W4385245566","https://openalex.org/W6623517193","https://openalex.org/W6629717138","https://openalex.org/W6638749077","https://openalex.org/W6675365184","https://openalex.org/W6711783022","https://openalex.org/W6713395095","https://openalex.org/W6713762819","https://openalex.org/W6739901393","https://openalex.org/W6754299077","https://openalex.org/W6767621752","https://openalex.org/W6778267192","https://openalex.org/W6779011057","https://openalex.org/W6787694064"],"related_works":["https://openalex.org/W2970530566","https://openalex.org/W2039546652","https://openalex.org/W2967478618","https://openalex.org/W2997152889","https://openalex.org/W4385572700","https://openalex.org/W4388335561","https://openalex.org/W4307309205","https://openalex.org/W4288261899","https://openalex.org/W4385009901","https://openalex.org/W4285141722"],"abstract_inverted_index":{"Nowadays,":[0],"most":[1,107,158],"methods":[2,19,142,154],"for":[3],"end-to-end":[4],"contextual":[5,13,17,23,57,117],"speech":[6],"recognition":[7,10],"bias":[8],"the":[9,33,43,69,82,93,96,103,140,175],"process":[11],"towards":[12],"knowledge.":[14],"Since":[15],"all-neural":[16,146],"biasing":[18,147],"rely":[20],"on":[21,51,92,144,165,174],"phrase-level":[22,116],"modeling":[24],"and":[25,87,119,133,167],"attention-based":[26],"relevance":[27],"modeling,":[28],"they":[29],"may":[30],"suffer":[31],"from":[32],"confusion":[34,53],"between":[35],"similar":[36],"context-specific":[37],"phrases,":[38],"which":[39],"hurts":[40],"predictions":[41],"at":[42,157],"token":[44,72,90],"level.":[45],"In":[46,61],"this":[47],"work,":[48],"we":[49,63,75,101,138],"focus":[50],"mitigating":[52],"problems":[54],"with":[55],"fine-grained":[56,65],"knowledge":[58,66],"selection":[59,79],"(FineCoS).":[60],"FineCoS,":[62],"introduce":[64],"to":[67,80,112,123],"reduce":[68],"uncertainty":[70],"of":[71,84,106],"predictions.":[73],"Specifically,":[74],"first":[76],"apply":[77],"phrase":[78,85,98],"narrow":[81],"range":[83],"candidates,":[86],"then":[88],"conduct":[89],"attention":[91,104],"tokens":[94],"in":[95,110],"selected":[97],"candidates.":[99],"Moreover,":[100],"re-normalize":[102],"weights":[105],"relevant":[108],"phrases":[109,128],"inference":[111],"obtain":[113],"more":[114],"focused":[115],"representations,":[118],"inject":[120],"position":[121],"information":[122],"help":[124],"model":[125],"better":[126],"discriminate":[127],"or":[129],"tokens.":[130],"On":[131],"LibriSpeech":[132,166],"an":[134,145],"in-house":[135,176],"160,000-hour":[136],"dataset,":[137],"explore":[139],"proposed":[141,153],"based":[143],"method,":[148],"collaborative":[149],"decoding":[150],"(ColDec).":[151],"The":[152],"further":[155],"bring":[156],"6.1%":[159],"relative":[160,169],"word":[161],"error":[162,171],"rate":[163,172],"reduction":[164,173],"16.4%":[168],"character":[170],"dataset.":[177]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
