{"id":"https://openalex.org/W4281770669","doi":"https://doi.org/10.21437/interspeech.2022-923","title":"LAE: Language-Aware Encoder for Monolingual and Multilingual ASR","display_name":"LAE: Language-Aware Encoder for Monolingual and Multilingual ASR","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4281770669","doi":"https://doi.org/10.21437/interspeech.2022-923"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-923","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-923","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068192693","display_name":"Jinchuan Tian","orcid":"https://orcid.org/0000-0002-2129-471X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jinchuan Tian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102286496","display_name":"Jianwei Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianwei Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100458065","display_name":"Chunlei Zhang","orcid":"https://orcid.org/0000-0002-6253-2446"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chunlei Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002795838","display_name":"Yuexian Zou","orcid":"https://orcid.org/0000-0001-9999-6140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuexian Zou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068192693"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8698,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.87545788,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3178","last_page":"3182"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8586263656616211},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.7361059188842773},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6130878925323486},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4687854051589966},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4563705027103424},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.045776188373565674}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8586263656616211},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7361059188842773},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6130878925323486},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4687854051589966},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4563705027103424},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.045776188373565674}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-923","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-923","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7099999785423279,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2130043461","https://openalex.org/W2530322880","https://openalex.org/W3192589309"],"abstract_inverted_index":{"Despite":[0],"the":[1,89,91,97,101,126,139,169],"rapid":[2],"progress":[3],"in":[4,129,148],"automatic":[5],"speech":[6,12,25,35,39,136],"recognition":[7,26],"(ASR)":[8],"research,":[9],"recognizing":[10,32,37],"multilingual":[11,24,52,158],"using":[13],"a":[14,46,50,65,118,163],"unified":[15],"ASR":[16,159],"system":[17],"remains":[18],"highly":[19],"challenging.":[20],"Previous":[21],"works":[22],"on":[23,29,133,154,176],"mainly":[27],"focus":[28],"two":[30],"directions:":[31],"multiple":[33],"monolingual":[34,156],"or":[36,165],"code-switched":[38,135,167],"that":[40,138],"uses":[41],"different":[42,146],"languages":[43,147],"interchangeably":[44],"within":[45],"single":[47],"utterance.":[48],"However,":[49],"pragmatic":[51],"recognizer":[53],"is":[54,71,94,122,142,184],"expected":[55],"to":[56,73,106,124],"be":[57],"compatible":[58],"with":[59],"both":[60,75,155,177],"directions.":[61],"In":[62,88],"this":[63],"work,":[64],"novel":[66],"language-aware":[67,84,119],"encoder":[68],"(LAE)":[69],"architecture":[70],"proposed":[72,123,140,170],"handle":[74],"situations":[76],"by":[77,96],"disentangling":[78],"language-specific":[79,102,115,127],"information":[80,116],"and":[81,150,157,179],"generating":[82],"frame-level":[83,149],"representations":[85,109],"during":[86],"encoding.":[87],"LAE,":[90],"primary":[92],"encoding":[93],"implemented":[95],"shared":[98],"block":[99],"while":[100],"blocks":[103,128],"are":[104],"used":[105],"extract":[107],"specific":[108],"for":[110],"each":[111],"language.":[112],"To":[113],"learn":[114],"discriminatively,":[117],"training":[120],"method":[121],"optimize":[125],"LAE.":[130],"Experiments":[131],"conducted":[132],"Mandarin-English":[134],"suggest":[137],"LAE":[141,171],"capable":[143],"of":[144],"discriminating":[145],"shows":[151],"superior":[152],"performance":[153],"tasks.":[160],"With":[161],"either":[162],"real-recorded":[164],"simulated":[166],"dataset,":[168],"achieves":[172],"statistically":[173],"significant":[174],"improvements":[175],"CTC":[178],"neural":[180],"transducer":[181],"systems.":[182],"Code":[183],"released":[185]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
