{"id":"https://openalex.org/W7148313864","doi":"https://doi.org/10.1109/asru65441.2025.11434724","title":"Can self-supervised speech models predict the perceived acceptability of prosodic variation?","display_name":"Can self-supervised speech models predict the perceived acceptability of prosodic variation?","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148313864","doi":"https://doi.org/10.1109/asru65441.2025.11434724"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434724","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434724","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042248416","display_name":"Sarenne Wallbridge","orcid":"https://orcid.org/0000-0002-1401-4492"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Sarenne Wallbridge","raw_affiliation_strings":["University of Edinburgh,Centre for Speech Technology Research,UK"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh,Centre for Speech Technology Research,UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053310461","display_name":"Adaeze Adigwe","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Adaeze Adigwe","raw_affiliation_strings":["University of Edinburgh,Centre for Speech Technology Research,UK"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh,Centre for Speech Technology Research,UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081337679","display_name":"Peter Bell","orcid":"https://orcid.org/0000-0003-4415-7408"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Peter Bell","raw_affiliation_strings":["University of Edinburgh,Centre for Speech Technology Research,UK"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh,Centre for Speech Technology Research,UK","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042248416"],"corresponding_institution_ids":["https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81893012,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.8812000155448914,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.8812000155448914,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.011300000362098217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11640","display_name":"Linguistic Variation and Morphology","score":0.011300000362098217,"subfield":{"id":"https://openalex.org/subfields/3310","display_name":"Linguistics and Language"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.34290000796318054},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.28209999203681946},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.26420000195503235},{"id":"https://openalex.org/keywords/speech-production","display_name":"Speech production","score":0.26420000195503235}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4643000066280365},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.44179999828338623},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.41530001163482666},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.34290000796318054},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.31459999084472656},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27950000762939453},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.27059999108314514},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26420000195503235}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434724","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434724","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.5405289530754089}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W2047146080","https://openalex.org/W2058373514","https://openalex.org/W2108862644","https://openalex.org/W2239141610","https://openalex.org/W2405103227","https://openalex.org/W2406225780","https://openalex.org/W2546732126","https://openalex.org/W2609635007","https://openalex.org/W2725697804","https://openalex.org/W2763110165","https://openalex.org/W2795342569","https://openalex.org/W2962784628","https://openalex.org/W2963979492","https://openalex.org/W2967220154","https://openalex.org/W2974597461","https://openalex.org/W3011535310","https://openalex.org/W3024308166","https://openalex.org/W3026278778","https://openalex.org/W3169633354","https://openalex.org/W3193768936","https://openalex.org/W3196225973","https://openalex.org/W3196231874","https://openalex.org/W3197580070","https://openalex.org/W3198270377","https://openalex.org/W3202278141","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4296068974","https://openalex.org/W4319862652","https://openalex.org/W4372260534","https://openalex.org/W4385245566","https://openalex.org/W4385822336","https://openalex.org/W4385822628","https://openalex.org/W4387849500","https://openalex.org/W4389777835","https://openalex.org/W4393859250","https://openalex.org/W4395699642","https://openalex.org/W4399888815","https://openalex.org/W4401609230","https://openalex.org/W4402111696","https://openalex.org/W4402112013","https://openalex.org/W4402112371","https://openalex.org/W4406461503","https://openalex.org/W4411119792","https://openalex.org/W4415433140","https://openalex.org/W7133228923"],"related_works":[],"abstract_inverted_index":{"Though":[0],"producing":[1],"an":[2,106],"appropriate":[3],"prosodic":[4,58,113],"realisation":[5],"of":[6,52,64,82,112,118],"text":[7],"is":[8,103],"a":[9,61,87],"one-to-many":[10],"problem,":[11],"modern":[12],"speech":[13,96],"generation":[14],"often":[15],"focuses":[16],"on":[17],"identifying":[18],"the":[19,110,116],"\u201cbest\u201d":[20],"or":[21],"\u201cmost":[22],"likely\u201d":[23],"output,":[24],"overlooking":[25],"acceptable":[26,57],"variation":[27],"across":[28,68],"realisations.":[29],"How":[30],"listeners":[31],"perceive":[32],"such":[33,83],"variation\u2013and":[34],"whether":[35,53,100],"models":[36,55],"capture":[37],"it\u2013is":[38],"unaccounted":[39],"for":[40,90],"in":[41,105],"current":[42],"evaluation":[43,121],"paradigms.":[44,122],"In":[45],"this":[46,101],"study,":[47],"we":[48,74,98],"present":[49],"exploratory":[50],"analyses":[51],"self-supervised":[54],"encode":[56],"variation.":[59],"Using":[60],"new":[62],"dataset":[63],"relative":[65],"acceptability":[66],"ratings":[67],"carefully":[69],"controlled,":[70],"high-quality":[71],"synthetic":[72],"utterances,":[73],"show":[75],"that":[76],"SSL":[77],"representations":[78],"contain":[79],"information":[80,102],"predictive":[81],"judgments.":[84],"By":[85],"introducing":[86],"novel":[88],"method":[89],"deriving":[91],"probability-based":[92],"uncertainty":[93],"from":[94],"autoregressive":[95],"models,":[97],"examine":[99],"available":[104],"unsupervised":[107],"setting,":[108],"highlighting":[109],"complexity":[111],"perception":[114],"and":[115],"value":[117],"more":[119],"human-centric":[120]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
