{"id":"https://openalex.org/W7148561625","doi":"https://doi.org/10.1109/asru65441.2025.11434687","title":"Enhancing In-the-Wild Speech Emotion Conversion with Resynthesis-based Duration Modeling","display_name":"Enhancing In-the-Wild Speech Emotion Conversion with Resynthesis-based Duration Modeling","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148561625","doi":"https://doi.org/10.1109/asru65441.2025.11434687"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434687","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434687","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064507749","display_name":"Navin Raj Prabhu","orcid":"https://orcid.org/0000-0002-0263-3077"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I4210136595","display_name":"Hamburg Institut (Germany)","ror":"https://ror.org/03t0n2419","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210136595"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Navin Raj Prabhu","raw_affiliation_strings":["University of Hamburg,Signal Processing,Hamburg,Germany"],"affiliations":[{"raw_affiliation_string":"University of Hamburg,Signal Processing,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246","https://openalex.org/I4210136595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102520429","display_name":"Danilo de Oliveira","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I4210136595","display_name":"Hamburg Institut (Germany)","ror":"https://ror.org/03t0n2419","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210136595"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Danilo De Oliveira","raw_affiliation_strings":["University of Hamburg,Signal Processing,Hamburg,Germany"],"affiliations":[{"raw_affiliation_string":"University of Hamburg,Signal Processing,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246","https://openalex.org/I4210136595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125147667","display_name":"Nale Lehmann-Willenbrock","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I70451448","display_name":"HAW Hamburg","ror":"https://ror.org/00fkqwx76","country_code":"DE","type":"education","lineage":["https://openalex.org/I70451448"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Nale Lehmann-Willenbrock","raw_affiliation_strings":["University of Hamburg,Industrial and Organizational Psychology,Hamburg,Germany"],"affiliations":[{"raw_affiliation_string":"University of Hamburg,Industrial and Organizational Psychology,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I70451448","https://openalex.org/I884043246"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5132830101","display_name":"Timo Gerkmann","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I4210136595","display_name":"Hamburg Institut (Germany)","ror":"https://ror.org/03t0n2419","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210136595"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Timo Gerkmann","raw_affiliation_strings":["University of Hamburg,Signal Processing,Hamburg,Germany"],"affiliations":[{"raw_affiliation_string":"University of Hamburg,Signal Processing,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246","https://openalex.org/I4210136595"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5064507749"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I4210136595","https://openalex.org/I884043246"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81943368,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.7785999774932861,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.7785999774932861,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.04699999839067459,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.03999999910593033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.5666000247001648},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.2777000069618225},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.2761000096797943},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.2743000090122223}],"concepts":[{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.5666000247001648},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46700000762939453},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4510999917984009},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4449999928474426},{"id":"https://openalex.org/C548259974","wikidata":"https://www.wikidata.org/wiki/Q569965","display_name":"Audiology","level":1,"score":0.30160000920295715},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.296999990940094},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2761000096797943},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.2743000090122223},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.2732999920845032}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434687","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434687","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2146334809","https://openalex.org/W2149628368","https://openalex.org/W2313339984","https://openalex.org/W2742542661","https://openalex.org/W2803098682","https://openalex.org/W2938833595","https://openalex.org/W2963035245","https://openalex.org/W2963300588","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W3015719316","https://openalex.org/W3016136182","https://openalex.org/W3096831136","https://openalex.org/W3140429000","https://openalex.org/W3142644187","https://openalex.org/W3161695192","https://openalex.org/W3209984917","https://openalex.org/W4205742757","https://openalex.org/W4221147462","https://openalex.org/W4286747238","https://openalex.org/W4323896824","https://openalex.org/W4361994820","https://openalex.org/W4372341094","https://openalex.org/W4378782001","https://openalex.org/W4379619502","https://openalex.org/W4385574033","https://openalex.org/W4385823186","https://openalex.org/W4386076005","https://openalex.org/W4392904147","https://openalex.org/W4402111615","https://openalex.org/W4406521093","https://openalex.org/W4408345780"],"related_works":[],"abstract_inverted_index":{"Speech":[0],"Emotion":[1],"Conversion":[2],"aims":[3],"to":[4,45,70],"modify":[5],"the":[6,43,47,87,90,100],"emotion":[7],"expressed":[8],"in":[9,27,99],"input":[10],"speech":[11,68,77,115],"while":[12,117],"preserving":[13],"lexical":[14],"content":[15,63],"and":[16,38,74,113],"speaker":[17],"identity.":[18],"Recently,":[19],"generative":[20],"modeling":[21,58,93],"approaches":[22],"have":[23],"shown":[24],"promising":[25],"results":[26,84],"changing":[28],"local":[29],"acoustic":[30],"properties":[31],"such":[32],"as":[33],"fundamental":[34],"frequency,":[35],"spectral":[36],"envelope":[37],"energy,":[39],"but":[40],"often":[41],"lack":[42],"ability":[44],"control":[46],"duration":[48,57,69,92],"of":[49,67,89],"sounds.":[50],"To":[51],"address":[52],"this,":[53],"we":[54],"propose":[55],"a":[56],"framework":[59,94],"using":[60,80],"resynthesis-based":[61],"discrete":[62],"representations,":[64],"enabling":[65],"modification":[66],"reflect":[71],"target":[72],"emotions":[73,108,119],"achieve":[75],"controllable":[76],"rates":[78],"without":[79],"parallel":[81],"data.":[82],"Experimental":[83],"reveal":[85],"that":[86,106],"inclusion":[88],"proposed":[91],"significantly":[95],"enhances":[96],"emotional":[97],"expressiveness,":[98],"in-the-wild":[101],"MSP-Podcast":[102],"dataset.":[103],"Analyses":[104],"show":[105],"low-arousal":[107],"correlate":[109],"with":[110],"longer":[111],"durations":[112],"slower":[114],"rates,":[116],"high-arousal":[118],"produce":[120],"shorter,":[121],"faster":[122],"speech.":[123]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
