{"id":"https://openalex.org/W4297841533","doi":"https://doi.org/10.21437/interspeech.2022-11039","title":"Improving Mispronunciation Detection with Wav2vec2-based Momentum Pseudo-Labeling for Accentedness and Intelligibility Assessment","display_name":"Improving Mispronunciation Detection with Wav2vec2-based Momentum Pseudo-Labeling for Accentedness and Intelligibility Assessment","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4297841533","doi":"https://doi.org/10.21437/interspeech.2022-11039"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-11039","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11039","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100542450","display_name":"Yang Mu","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mu Yang","raw_affiliation_strings":["Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA"],"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050069117","display_name":"Kevin Hirschi","orcid":"https://orcid.org/0000-0002-0838-3494"},"institutions":[{"id":"https://openalex.org/I203172682","display_name":"Northern Arizona University","ror":"https://ror.org/0272j5188","country_code":"US","type":"education","lineage":["https://openalex.org/I203172682"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Hirschi","raw_affiliation_strings":["Northern Arizona University, Flagstaff, AZ, USA"],"affiliations":[{"raw_affiliation_string":"Northern Arizona University, Flagstaff, AZ, USA","institution_ids":["https://openalex.org/I203172682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013317158","display_name":"Stephen Daniel Looney","orcid":"https://orcid.org/0000-0001-9968-7276"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephen Daniel Looney","raw_affiliation_strings":["Pennsylvania State University, State College, PA, USA"],"affiliations":[{"raw_affiliation_string":"Pennsylvania State University, State College, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019197893","display_name":"Okim Kang","orcid":"https://orcid.org/0000-0002-7721-5283"},"institutions":[{"id":"https://openalex.org/I203172682","display_name":"Northern Arizona University","ror":"https://ror.org/0272j5188","country_code":"US","type":"education","lineage":["https://openalex.org/I203172682"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Okim Kang","raw_affiliation_strings":["Northern Arizona University, Flagstaff, AZ, USA"],"affiliations":[{"raw_affiliation_string":"Northern Arizona University, Flagstaff, AZ, USA","institution_ids":["https://openalex.org/I203172682"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057910370","display_name":"John H. L. Hansen","orcid":"https://orcid.org/0000-0003-1382-9929"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John H.L. Hansen","raw_affiliation_strings":["Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA"],"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100542450"],"corresponding_institution_ids":["https://openalex.org/I162577319"],"apc_list":null,"apc_paid":null,"fwci":0.8399,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.74682001,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"4481","last_page":"4485"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8881000280380249,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8881000280380249,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7639999985694885,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5695968866348267},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.5611650347709656},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.48212695121765137},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33863991498947144},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06705713272094727}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5695968866348267},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.5611650347709656},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48212695121765137},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33863991498947144},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06705713272094727},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-11039","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11039","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.5299999713897705,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310831","display_name":"Northern Arizona University","ror":"https://ror.org/0272j5188"},{"id":"https://openalex.org/F4320327708","display_name":"University of Texas at Dallas","ror":"https://ror.org/049emcs32"},{"id":"https://openalex.org/F4320332169","display_name":"Directorate for Computer and Information Science and Engineering","ror":"https://ror.org/025kzpk63"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2091856355","https://openalex.org/W2401896499","https://openalex.org/W2407080277","https://openalex.org/W2552635739","https://openalex.org/W2888954148","https://openalex.org/W2889488531","https://openalex.org/W2896457183","https://openalex.org/W2972347929","https://openalex.org/W2979826702","https://openalex.org/W3015231007","https://openalex.org/W3015522062","https://openalex.org/W3026041220","https://openalex.org/W3034238904","https://openalex.org/W3036601975","https://openalex.org/W3096338464","https://openalex.org/W3097515180","https://openalex.org/W3097911750","https://openalex.org/W3160525311","https://openalex.org/W3161627112","https://openalex.org/W3167533889","https://openalex.org/W3169320628","https://openalex.org/W3196525293","https://openalex.org/W3197816268","https://openalex.org/W3198712976","https://openalex.org/W3205234329","https://openalex.org/W3207925072","https://openalex.org/W3209984917","https://openalex.org/W4286974574","https://openalex.org/W4287118210","https://openalex.org/W4287374065","https://openalex.org/W4287553982","https://openalex.org/W4375869379"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2530322880","https://openalex.org/W2127461790","https://openalex.org/W2069324367"],"abstract_inverted_index":{"Current":[0],"leading":[1],"mispronunciation":[2],"detection":[3],"and":[4,42,62,82,117,152,183],"diagnosis":[5],"(MDD)":[6],"systems":[7],"achieve":[8],"promising":[9],"performance":[10],"via":[11,37],"end-to-end":[12,18],"phoneme":[13,113],"recognition.One":[14],"challenge":[15],"of":[16,23,88],"such":[17],"solutions":[19],"is":[20,98,131],"the":[21,44,72,89,141],"scarcity":[22],"human-annotated":[24],"phonemes":[25],"on":[26,48,163,181],"natural":[27],"L2":[28,35,68,75],"speech.In":[29],"this":[30],"work,":[31],"we":[32,54,158],"leverage":[33],"unlabeled":[34],"speech":[36,69,76],"a":[38,111,124,149,164,174],"pseudo-labeling":[39],"(PL)":[40],"procedure":[41],"extend":[43],"fine-tuning":[45,106],"approach":[46],"based":[47,180],"pre-trained":[49],"self-supervised":[50],"learning":[51],"(SSL)":[52],"models.Specifically,":[53],"use":[55],"Wav2vec":[56],"2.0":[57],"as":[58],"our":[59,96,145,169],"SSL":[60],"model,":[61],"fine-tune":[63],"it":[64],"using":[65],"original":[66],"labeled":[67],"samples":[70],"plus":[71],"created":[73],"pseudo-labeled":[74],"samples.Our":[77],"pseudo":[78,101,108],"labels":[79,109],"are":[80,83],"dynamic":[81],"produced":[84],"by":[85],"an":[86,160],"ensemble":[87],"online":[90],"model":[91,97],"on-the-fly,":[92],"which":[93],"ensures":[94],"that":[95,105],"robust":[99],"to":[100,134,140],"label":[102],"noise.We":[103],"show":[104,173],"with":[107,177],"achieves":[110],"5.35%":[112],"error":[114,155],"rate":[115],"reduction":[116],"2.48%":[118],"MDD":[119,143,146],"F1":[120],"score":[121],"improvement":[122],"over":[123],"labeled-samples-only":[125],"finetuning":[126],"baseline.The":[127],"proposed":[128],"PL":[129,138],"method":[130],"also":[132],"shown":[133],"outperform":[135],"conventional":[136],"offline":[137],"methods.Compared":[139],"state-of-the-art":[142],"systems,":[144],"solution":[147],"produces":[148],"more":[150],"accurate":[151],"consistent":[153],"phonetic":[154],"diagnosis.In":[156],"addition,":[157],"conduct":[159],"open":[161],"test":[162],"separate":[165],"UTD-4Accents":[166],"dataset,":[167],"where":[168],"system":[170],"recognition":[171],"outputs":[172],"strong":[175],"correlation":[176],"human":[178],"perception,":[179],"accentedness":[182],"intelligibility.":[184]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4}],"updated_date":"2026-03-08T06:56:09.383167","created_date":"2025-10-10T00:00:00"}
