{"id":"https://openalex.org/W3194701944","doi":"https://doi.org/10.21437/interspeech.2021-57","title":"Cross-Domain Speech Recognition with Unsupervised Character-Level Distribution Matching","display_name":"Cross-Domain Speech Recognition with Unsupervised Character-Level Distribution Matching","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3194701944","doi":"https://doi.org/10.21437/interspeech.2021-57","mag":"3194701944"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-57","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-57","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027365025","display_name":"Wenxin Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wenxin Hou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100700956","display_name":"Jindong Wang","orcid":"https://orcid.org/0000-0002-4833-0880"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jindong Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101522530","display_name":"Xu Tan","orcid":"https://orcid.org/0000-0001-5631-0639"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu Tan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020025718","display_name":"Tao Qin","orcid":"https://orcid.org/0000-0002-9095-0776"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao Qin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103015161","display_name":"Takahiro Shinozaki","orcid":"https://orcid.org/0000-0001-8114-8450"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Takahiro Shinozaki","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5027365025"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.6799,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.87000014,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3425","last_page":"3429"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.972100019454956,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6665344834327698},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.6317893266677856},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5859560966491699},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5538762211799622},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5367705821990967},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4328199028968811},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39317020773887634},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36240512132644653},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1450406014919281},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.12181240320205688}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6665344834327698},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.6317893266677856},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5859560966491699},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5538762211799622},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5367705821990967},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4328199028968811},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39317020773887634},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36240512132644653},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1450406014919281},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12181240320205688},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2021-57","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-57","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.47999998927116394}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2789919619","https://openalex.org/W2293457016","https://openalex.org/W3169305685","https://openalex.org/W2033914206","https://openalex.org/W2146076056","https://openalex.org/W2351428524","https://openalex.org/W2163831990","https://openalex.org/W2368779261","https://openalex.org/W3003836766","https://openalex.org/W1551406738"],"abstract_inverted_index":{"End-to-end":[0],"automatic":[1],"speech":[2],"recognition":[3,31],"(ASR)":[4],"can":[5],"achieve":[6,75],"promising":[7],"performance":[8],"with":[9],"large-scale":[10],"training":[11,21],"data.":[12],"However,":[13],"it":[14],"is":[15],"known":[16],"that":[17,111],"domain":[18,41],"mismatch":[19],"between":[20,57],"and":[22,45,117,128,142],"testing":[23],"data":[24],"often":[25],"leads":[26],"to":[27,53,64,71],"a":[28,48],"degradation":[29],"of":[30],"accuracy.":[32],"In":[33],"this":[34],"work,":[35],"we":[36,74,88],"focus":[37],"on":[38,106,125],"the":[39,68,80,90,102,107,135],"unsupervised":[40],"adaptation":[42,56],"for":[43,67,138],"ASR":[44],"propose":[46],"CMatch,":[47],"Character-level":[49],"distribution":[50],"matching":[51],"method":[52],"perform":[54],"fine-grained":[55],"each":[58,72],"character":[59],"in":[60],"two":[61],"domains.":[62],"First,":[63],"obtain":[65],"labels":[66],"features":[69],"belonging":[70],"character,":[73],"frame-level":[76,139],"label":[77,140],"assignment":[78,141],"using":[79,93,101],"Connectionist":[81],"Temporal":[82],"Classification":[83],"(CTC)":[84],"pseudo":[85],"labels.":[86],"Then,":[87],"match":[89],"character-level":[91],"distributions":[92],"Maximum":[94],"Mean":[95],"Discrepancy.":[96],"We":[97,131],"train":[98],"our":[99,112],"algorithm":[100],"self-training":[103],"technique.":[104],"Experiments":[105],"Libri-Adapt":[108],"dataset":[109],"show":[110],"proposed":[113],"approach":[114],"achieves":[115],"14.39%":[116],"16.50%":[118],"relative":[119],"Word":[120],"Error":[121],"Rate":[122],"(WER)":[123],"reduction":[124],"both":[126],"cross-device":[127],"cross-environment":[129],"ASR.":[130],"also":[132],"comprehensively":[133],"analyze":[134],"different":[136],"strategies":[137],"Transformer":[143],"adaptations.":[144]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
