{"id":"https://openalex.org/W4224916964","doi":"https://doi.org/10.1109/icassp43922.2022.9746720","title":"A Model for Assessor Bias in Automatic Pronunciation Assessment","display_name":"A Model for Assessor Bias in Automatic Pronunciation Assessment","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4224916964","doi":"https://doi.org/10.1109/icassp43922.2022.9746720"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746720","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746720","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057148234","display_name":"Jose Antonio Lopez Saenz","orcid":"https://orcid.org/0000-0002-8779-5947"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Jose Antonio Lopez Saenz","raw_affiliation_strings":["University of Sheffield,Speech and Hearing Research, Department of Computer Science,UK","Speech and Hearing Research, Department of Computer Science, University of Sheffield, UK"],"affiliations":[{"raw_affiliation_string":"University of Sheffield,Speech and Hearing Research, Department of Computer Science,UK","institution_ids":["https://openalex.org/I91136226"]},{"raw_affiliation_string":"Speech and Hearing Research, Department of Computer Science, University of Sheffield, UK","institution_ids":["https://openalex.org/I91136226"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030528300","display_name":"Thomas Hain","orcid":"https://orcid.org/0000-0003-0939-3464"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Thomas Hain","raw_affiliation_strings":["University of Sheffield,Speech and Hearing Research, Department of Computer Science,UK","Speech and Hearing Research, Department of Computer Science, University of Sheffield, UK"],"affiliations":[{"raw_affiliation_string":"University of Sheffield,Speech and Hearing Research, Department of Computer Science,UK","institution_ids":["https://openalex.org/I91136226"]},{"raw_affiliation_string":"Speech and Hearing Research, Department of Computer Science, University of Sheffield, UK","institution_ids":["https://openalex.org/I91136226"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5057148234"],"corresponding_institution_ids":["https://openalex.org/I91136226"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02009373,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7267","last_page":"7271"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.978600025177002,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7464127540588379},{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.7317672967910767},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.588284969329834},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4814012050628662},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.37341850996017456},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36969101428985596},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.22093147039413452}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7464127540588379},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.7317672967910767},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.588284969329834},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4814012050628662},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37341850996017456},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36969101428985596},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.22093147039413452},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746720","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746720","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7300000190734863}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320342","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1505400583","https://openalex.org/W2075748225","https://openalex.org/W2130722890","https://openalex.org/W2132596650","https://openalex.org/W2133564696","https://openalex.org/W2138519554","https://openalex.org/W2194775991","https://openalex.org/W2293634267","https://openalex.org/W2777448780","https://openalex.org/W2805034849","https://openalex.org/W2972347929","https://openalex.org/W2974489302","https://openalex.org/W3008554267","https://openalex.org/W3096674206","https://openalex.org/W4210455232","https://openalex.org/W4385245566","https://openalex.org/W6679434410","https://openalex.org/W6739901393","https://openalex.org/W6767101315","https://openalex.org/W6780226713"],"related_works":["https://openalex.org/W2183593636","https://openalex.org/W2350724007","https://openalex.org/W2355751417","https://openalex.org/W2423284978","https://openalex.org/W2083922162","https://openalex.org/W2000075989","https://openalex.org/W4220683390","https://openalex.org/W2776838583","https://openalex.org/W2359469050","https://openalex.org/W3204019825"],"abstract_inverted_index":{"In":[0],"pronunciation":[1,11,23,44],"assessment,":[2],"the":[3,36,47,66,132,149,158,162,166,178],"assessor\u2019s":[4],"perception":[5],"is":[6],"influenced":[7],"by":[8,116],"a":[9,17,41,76],"particular":[10],"template.":[12],"This":[13,38],"assessor":[14,51,56,67,133],"may":[15,32],"hold":[16],"bias":[18],"towards":[19],"certain":[20],"variations":[21],"in":[22],"which":[24,111],"do":[25],"not":[26],"necessarily":[27],"impact":[28],"communication,":[29],"yet":[30],"they":[31],"be":[33,63],"penalized":[34],"during":[35],"assessment.":[37],"work":[39],"proposes":[40],"model":[42,78,168],"for":[43,114,143],"assessment":[45,71],"as":[46,65,75,108],"combination":[48],"of":[49,106,138,165],"an":[50,55],"independent":[52],"(A)":[53],"and":[54,90,141,152,155],"specific":[57],"(B)":[58],"component.":[59],"The":[60,69,85,123],"latter":[61],"could":[62],"interpreted":[64],"bias.":[68,179],"resulting":[70],"function":[72],"was":[73,98],"implemented":[74],"dual":[77],"trained":[79,118],"to":[80,128,171,177],"detect":[81,129],"mispronounced":[82],"speech":[83],"segments.":[84],"models":[86,124],"incorporate":[87],"Long-Short":[88],"Memory":[89],"saliency":[91],"region":[92],"selection":[93],"using":[94,100],"attention.":[95],"An":[96],"experiment":[97],"performed":[99],"recordings":[101],"from":[102],"young":[103],"Dutch":[104],"learners":[105],"English":[107],"second":[109],"language,":[110],"were":[112,126,169],"annotated":[113],"mispronunciation":[115],"three":[117],"phoneticians":[119],"(a1,":[120],"a2,":[121,145],"a3).":[122],"combined":[125],"able":[127,170],"mispronunciations":[130],"given":[131],"identity":[134],"achieving":[135],"F1":[136],"scores":[137],"0.77,":[139],"0.68":[140],"0.86":[142],"a1,":[144],"a3":[146],"respectively":[147],"on":[148,157],"Train":[150],"set":[151],"0.66,":[153],"0.53":[154],"0.81":[156],"Test":[159],"set.":[160],"Additionally,":[161],"attention":[163],"weights":[164],"B":[167],"illustrate":[172],"disagreements":[173],"between":[174],"assessors":[175],"related":[176]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}