{"id":"https://openalex.org/W4297841598","doi":"https://doi.org/10.21437/interspeech.2022-872","title":"Adaptive multilingual speech recognition with pretrained models","display_name":"Adaptive multilingual speech recognition with pretrained models","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4297841598","doi":"https://doi.org/10.21437/interspeech.2022-872"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-872","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-872","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004365148","display_name":"Ngoc-Quan Pham","orcid":null},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Ngoc-Quan Pham","raw_affiliation_strings":["Interactive Systems Lab, Karlsruhe Institute of Technology, Karlsruhe, Germany"],"affiliations":[{"raw_affiliation_string":"Interactive Systems Lab, Karlsruhe Institute of Technology, Karlsruhe, Germany","institution_ids":["https://openalex.org/I102335020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023053982","display_name":"Alexander Waibel","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]},{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Alexander Waibel","raw_affiliation_strings":["Interactive Systems Lab, Karlsruhe Institute of Technology, Karlsruhe, Germany","Carnegie Mellon University, Pittsburgh PA, USA"],"affiliations":[{"raw_affiliation_string":"Interactive Systems Lab, Karlsruhe Institute of Technology, Karlsruhe, Germany","institution_ids":["https://openalex.org/I102335020"]},{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046084081","display_name":"Jan Niehues","orcid":"https://orcid.org/0000-0002-4231-6543"},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jan Niehues","raw_affiliation_strings":["Interactive Systems Lab, Karlsruhe Institute of Technology, Karlsruhe, Germany"],"affiliations":[{"raw_affiliation_string":"Interactive Systems Lab, Karlsruhe Institute of Technology, Karlsruhe, Germany","institution_ids":["https://openalex.org/I102335020"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5004365148"],"corresponding_institution_ids":["https://openalex.org/I102335020"],"apc_list":null,"apc_paid":null,"fwci":1.6631,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.86080586,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3879","last_page":"3883"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9668999910354614,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9668999910354614,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8119994401931763},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6808622479438782},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4921054244041443},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.484007328748703}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8119994401931763},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6808622479438782},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4921054244041443},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.484007328748703}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-872","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-872","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W2092845679","https://openalex.org/W2327501763","https://openalex.org/W2888456631","https://openalex.org/W2896457183","https://openalex.org/W2936774411","https://openalex.org/W2962784628","https://openalex.org/W2962826786","https://openalex.org/W2963211188","https://openalex.org/W2964110616","https://openalex.org/W2964303773","https://openalex.org/W2970925270","https://openalex.org/W2971840980","https://openalex.org/W2972451902","https://openalex.org/W2973049979","https://openalex.org/W2981991061","https://openalex.org/W2997436923","https://openalex.org/W3005861412","https://openalex.org/W3015698636","https://openalex.org/W3015889230","https://openalex.org/W3030437843","https://openalex.org/W3034999214","https://openalex.org/W3036601975","https://openalex.org/W3095410713","https://openalex.org/W3096032230","https://openalex.org/W3097030750","https://openalex.org/W3097777922","https://openalex.org/W3107826490","https://openalex.org/W3169320628","https://openalex.org/W3172698324","https://openalex.org/W3173767661","https://openalex.org/W3197932033","https://openalex.org/W3200601846","https://openalex.org/W3213029956","https://openalex.org/W4287694131","https://openalex.org/W4302764113","https://openalex.org/W4308349017","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2130043461","https://openalex.org/W2530322880","https://openalex.org/W3192589309"],"abstract_inverted_index":{"Multilingual":[0],"speech":[1],"recognition":[2,76],"with":[3,43,67],"supervised":[4,93],"learning":[5],"has":[6],"achieved":[7],"great":[8],"results":[9],"as":[10],"reflected":[11],"in":[12,40,104],"recent":[13],"research.With":[14],"the":[15,30,48,68,75,79,114,126],"development":[16],"of":[17,50],"pretraining":[18],"methods":[19],"on":[20,78],"audio":[21,61],"and":[22,62,84,95],"text":[23],"data,":[24],"it":[25],"is":[26],"imperative":[27],"to":[28,36,72,111,125],"transfer":[29],"knowledge":[31],"from":[32],"unsupervised":[33],"multilingual":[34],"models":[35,54],"facilitate":[37],"recognition,":[38],"especially":[39],"many":[41],"languages":[42],"limited":[44],"data.Our":[45],"work":[46],"investigated":[47],"effectiveness":[49],"using":[51],"two":[52,56],"pretrained":[53],"for":[55,60,64],"modalities:":[57],"wav2vec":[58],"2.0":[59],"MBART50":[63],"text,":[65],"together":[66],"adaptive":[69],"weight":[70],"techniques":[71],"massively":[73],"improve":[74],"quality":[77],"public":[80],"datasets":[81],"containing":[82],"CommonVoice":[83],"Europarl.Overall,":[85],"we":[86],"noticed":[87],"an":[88],"44%":[89],"improvement":[90],"over":[91],"purely":[92],"learning,":[94],"more":[96],"importantly,":[97],"each":[98],"technique":[99],"provides":[100],"a":[101],"different":[102,105],"reinforcement":[103],"languages.We":[106],"also":[107],"explore":[108],"other":[109],"possibilities":[110],"potentially":[112],"obtain":[113],"best":[115],"model":[116],"by":[117],"slightly":[118],"adding":[119],"either":[120],"depth":[121],"or":[122],"relative":[123],"attention":[124],"architecture.":[127]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
