{"id":"https://openalex.org/W7148534773","doi":"https://doi.org/10.1109/asru65441.2025.11434710","title":"Emphasis Sensitivity in Speech Representations","display_name":"Emphasis Sensitivity in Speech Representations","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148534773","doi":"https://doi.org/10.1109/asru65441.2025.11434710"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434710","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434710","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Shaun Rafael Cassini","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shaun Rafael Cassini","raw_affiliation_strings":["University of Sheffield,Sheffield,UK"],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Thomas Hain","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thomas Hain","raw_affiliation_strings":["University of Sheffield,Sheffield,UK"],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Anton Ragni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anton Ragni","raw_affiliation_strings":["University of Sheffield,Sheffield,UK"],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.82010541,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.33070001006126404,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.33070001006126404,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10465","display_name":"Neurobiology of Language and Bilingualism","score":0.06530000269412994,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10034","display_name":"Syntax, Semantics, Linguistic Variation","score":0.04729999974370003,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/emphasis","display_name":"Emphasis (telecommunications)","score":0.857200026512146},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.57669997215271},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5667999982833862},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.4975999891757965},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.48190000653266907},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.4142000079154968},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.4043000042438507},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.3887999951839447}],"concepts":[{"id":"https://openalex.org/C177454536","wikidata":"https://www.wikidata.org/wiki/Q578290","display_name":"Emphasis (telecommunications)","level":2,"score":0.857200026512146},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6327000260353088},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5800999999046326},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.57669997215271},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5667999982833862},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.4975999891757965},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.48190000653266907},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43529999256134033},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.4142000079154968},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.4043000042438507},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3887999951839447},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.3849000036716461},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3822999894618988},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.37709999084472656},{"id":"https://openalex.org/C143724316","wikidata":"https://www.wikidata.org/wiki/Q312468","display_name":"Series (stratigraphy)","level":2,"score":0.35580000281333923},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3458000123500824},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.3343000113964081},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.31060001254081726},{"id":"https://openalex.org/C123406163","wikidata":"https://www.wikidata.org/wiki/Q82042","display_name":"Part of speech","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.26460000872612},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2637999951839447}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434710","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434710","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W1539337024","https://openalex.org/W1975343484","https://openalex.org/W2013319990","https://openalex.org/W2039207705","https://openalex.org/W2052210267","https://openalex.org/W2138435742","https://openalex.org/W2151929554","https://openalex.org/W2154663961","https://openalex.org/W2179428711","https://openalex.org/W2242221029","https://openalex.org/W2295124130","https://openalex.org/W2317567465","https://openalex.org/W2398104528","https://openalex.org/W2403632467","https://openalex.org/W2426479676","https://openalex.org/W2467926808","https://openalex.org/W2539740269","https://openalex.org/W2546732126","https://openalex.org/W2582020873","https://openalex.org/W2735101968","https://openalex.org/W2747874407","https://openalex.org/W2807912760","https://openalex.org/W2918595871","https://openalex.org/W2944569409","https://openalex.org/W3104432410","https://openalex.org/W3125687907","https://openalex.org/W3136454894","https://openalex.org/W3156236758","https://openalex.org/W3182074706","https://openalex.org/W3197580070","https://openalex.org/W3202070718","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3213029956","https://openalex.org/W3214634826","https://openalex.org/W4211010150","https://openalex.org/W4234920300","https://openalex.org/W4281492411","https://openalex.org/W4297841409","https://openalex.org/W4303476210","https://openalex.org/W4319862401","https://openalex.org/W4319862652","https://openalex.org/W4385822474","https://openalex.org/W4385823368","https://openalex.org/W4385993923","https://openalex.org/W4390911524","https://openalex.org/W4391771405","https://openalex.org/W4394773771","https://openalex.org/W4402111696","https://openalex.org/W4404483879","https://openalex.org/W4404781193","https://openalex.org/W4404783121"],"related_works":[],"abstract_inverted_index":{"This":[0,45],"work":[1,23],"investigates":[2],"whether":[3],"modern":[4],"speech":[5,66],"models":[6,67],"are":[7],"sensitive":[8],"to":[9,101],"prosodic":[10,90],"emphasis-whether":[11],"they":[12],"encode":[13],"emphasized":[14,60],"and":[15,59,77],"neutral":[16,58],"words":[17],"in":[18,106],"systematically":[19],"different":[20],"ways.":[21],"Prior":[22],"typically":[24],"relies":[25],"on":[26,64],"isolated":[27],"acoustic":[28],"correlates":[29],"(e.g.,":[30],"pitch,":[31],"duration)":[32],"or":[33],"label":[34],"prediction,":[35,83],"both":[36],"of":[37,43,89],"which":[38],"miss":[39],"the":[40,54],"relational":[41,87],"structure":[42],"emphasis.":[44,91],"paper":[46],"proposes":[47],"a":[48,85,98,116],"residualbased":[49],"framework,":[50],"defining":[51],"emphasis":[52,112],"as":[53,115],"difference":[55],"between":[56],"paired":[57],"word":[61,81],"representations.":[62],"Analysis":[63],"self-supervised":[65],"shows":[68],"that":[69,111,120],"these":[70],"residuals":[71,96],"correlate":[72],"strongly":[73],"with":[74,124],"duration":[75],"changes":[76],"perform":[78],"poorly":[79],"at":[80],"identity":[82],"indicating":[84],"structured,":[86],"encoding":[88],"In":[92],"ASR":[93],"fine-tuned":[94],"models,":[95,108],"occupy":[97],"subspace":[99],"up":[100],"50%":[102],"more":[103,122],"compact":[104],"than":[105],"pre-trained":[107],"further":[109],"suggesting":[110],"is":[113],"encoded":[114],"consistent,":[117],"lowdimensional":[118],"transformation":[119],"becomes":[121],"structured":[123],"task-specific":[125],"learning.":[126]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-12-10T00:00:00"}
