{"id":"https://openalex.org/W4416037049","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.134","title":"Read to Hear: A Zero-Shot Pronunciation Assessment Using Textual Descriptions and LLMs","display_name":"Read to Hear: A Zero-Shot Pronunciation Assessment Using Textual Descriptions and LLMs","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416037049","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.134"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.134","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.134","pdf_url":"https://aclanthology.org/2025.emnlp-main.134.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.134.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100428120","display_name":"Yu-Wen Chen","orcid":"https://orcid.org/0000-0002-7084-7632"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]},{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yu-Wen Chen","raw_affiliation_strings":["Department of Computer Science , Columbia University , United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science , Columbia University , United States","institution_ids":["https://openalex.org/I76835614","https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077512292","display_name":"Meng Ma","orcid":"https://orcid.org/0000-0001-6514-5495"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]},{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Melody Ma","raw_affiliation_strings":["Department of Computer Science , Columbia University , United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science , Columbia University , United States","institution_ids":["https://openalex.org/I76835614","https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045037642","display_name":"Julia Hirschberg","orcid":"https://orcid.org/0000-0003-0689-7616"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]},{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Julia Hirschberg","raw_affiliation_strings":["Department of Computer Science , Columbia University , United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science , Columbia University , United States","institution_ids":["https://openalex.org/I76835614","https://openalex.org/I78577930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100428120"],"corresponding_institution_ids":["https://openalex.org/I76835614","https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18561379,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2682","last_page":"2694"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.14980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.14980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.13689999282360077,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.11169999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.6560999751091003},{"id":"https://openalex.org/keywords/government","display_name":"Government (linguistics)","score":0.2515999972820282},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.23190000653266907},{"id":"https://openalex.org/keywords/corpus-linguistics","display_name":"Corpus linguistics","score":0.21930000185966492}],"concepts":[{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.6560999751091003},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.6349999904632568},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5040000081062317},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46790000796318054},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.445499986410141},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.3386000096797943},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.27379998564720154},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.2515999972820282},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.23190000653266907},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.21930000185966492}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.134","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.134","pdf_url":"https://aclanthology.org/2025.emnlp-main.134.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.134","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.134","pdf_url":"https://aclanthology.org/2025.emnlp-main.134.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416037049.pdf","grobid_xml":"https://content.openalex.org/works/W4416037049.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automatic":[0],"pronunciation":[1,46,75,107,121],"assessment":[2],"is":[3,93,132],"typically":[4],"performed":[5],"by":[6,151],"acoustic":[7],"models":[8,32,147],"trained":[9],"on":[10,111,148],"audio-score":[11],"pairs.Although":[12],"effective,":[13],"these":[14],"systems":[15],"provide":[16],"only":[17],"numerical":[18],"scores,":[19],"without":[20],"the":[21,84,97,119,142],"information":[22],"needed":[23],"to":[24,73,95],"help":[25],"learners":[26],"understand":[27],"their":[28,42],"errors.Meanwhile,":[29],"large":[30],"language":[31,39],"(LLMs)":[33],"have":[34],"proven":[35],"effective":[36],"in":[37,124,137],"supporting":[38],"learning,":[40],"but":[41],"potential":[43],"for":[44,106],"assessing":[45],"remains":[47],"unexplored.In":[48],"this":[49],"work,":[50],"we":[51,117],"introduce":[52],"TextPA,":[53],"a":[54,87,102,153],"zero-shot,":[55],"Textual":[56],"description-based":[57],"Pronunciation":[58],"Assessment":[59],"approach.TextPA":[60],"utilizes":[61],"human-readable":[62],"representations":[63],"of":[64,109,144],"speech":[65],"signals,":[66],"which":[67],"are":[68],"fed":[69],"into":[70],"an":[71],"LLM":[72],"assess":[74],"accuracy":[76,98],"and":[77,135],"fluency,":[78],"while":[79],"also":[80],"providing":[81],"reasoning":[82],"behind":[83],"assigned":[85],"scores.Finally,":[86],"phoneme":[88],"sequence":[89],"match":[90],"scoring":[91],"method":[92],"used":[94],"refine":[96],"scores.Our":[99],"work":[100],"highlights":[101],"previously":[103],"overlooked":[104],"direction":[105],"assessment.Instead":[108],"relying":[110],"supervised":[112],"training":[113],"with":[114],"audioscore":[115],"examples,":[116],"exploit":[118],"rich":[120],"knowledge":[122],"embedded":[123],"written":[125],"text.Experimental":[126],"results":[127],"show":[128],"that":[129],"our":[130],"approach":[131],"both":[133],"cost-efficient":[134],"competitive":[136],"performance.Furthermore,":[138],"TextPA":[139],"significantly":[140],"improves":[141],"performance":[143],"conventional":[145],"audioscore-trained":[146],"out-of-domain":[149],"data":[150],"offering":[152],"complementary":[154],"perspective.":[155]},"counts_by_year":[],"updated_date":"2026-03-10T14:07:55.174380","created_date":"2025-11-08T00:00:00"}
