{"id":"https://openalex.org/W4406312018","doi":"https://doi.org/10.1109/lsp.2025.3528359","title":"PRESENT: Zero-Shot Text-to-Prosody Control","display_name":"PRESENT: Zero-Shot Text-to-Prosody Control","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4406312018","doi":"https://doi.org/10.1109/lsp.2025.3528359"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2025.3528359","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2025.3528359","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002258189","display_name":"Perry Lam","orcid":"https://orcid.org/0000-0001-9607-0756"},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Perry Lam","raw_affiliation_strings":["Singapore University of Technology and Design, Singapore","Singapore University of Technology &amp; Design, Singapore"],"raw_orcid":"https://orcid.org/0000-0001-9607-0756","affiliations":[{"raw_affiliation_string":"Singapore University of Technology and Design, Singapore","institution_ids":["https://openalex.org/I152815399"]},{"raw_affiliation_string":"Singapore University of Technology &amp; Design, Singapore","institution_ids":["https://openalex.org/I152815399"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102856166","display_name":"Huayun Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Huayun Zhang","raw_affiliation_strings":["Institute of Infocomm Research, A*STAR, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-2528-273X","affiliations":[{"raw_affiliation_string":"Institute of Infocomm Research, A*STAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014190404","display_name":"Nancy F. Chen","orcid":"https://orcid.org/0000-0003-0872-5877"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Nancy F. Chen","raw_affiliation_strings":["Institute of Infocomm Research, A*STAR, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-0872-5877","affiliations":[{"raw_affiliation_string":"Institute of Infocomm Research, A*STAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001303929","display_name":"Berrak \u015ei\u015fman","orcid":"https://orcid.org/0000-0001-8078-3305"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Berrak Sisman","raw_affiliation_strings":["Johns Hopkins University, Baltimore, MD, USA"],"raw_orcid":"https://orcid.org/0000-0001-8078-3305","affiliations":[{"raw_affiliation_string":"Johns Hopkins University, Baltimore, MD, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069548004","display_name":"Dorien Herremans","orcid":"https://orcid.org/0000-0001-8607-1640"},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Dorien Herremans","raw_affiliation_strings":["Singapore University of Technology and Design, Singapore","Singapore University of Technology &amp; Design, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Singapore University of Technology and Design, Singapore","institution_ids":["https://openalex.org/I152815399"]},{"raw_affiliation_string":"Singapore University of Technology &amp; Design, Singapore","institution_ids":["https://openalex.org/I152815399"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5002258189"],"corresponding_institution_ids":["https://openalex.org/I152815399"],"apc_list":null,"apc_paid":null,"fwci":2.1733,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86897115,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"32","issue":null,"first_page":"776","last_page":"780"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9589999914169312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.6770145893096924},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5866167545318604},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.5785393118858337},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.5270329117774963},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4554228186607361},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.40705394744873047},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38084372878074646},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1387292444705963}],"concepts":[{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.6770145893096924},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5866167545318604},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.5785393118858337},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.5270329117774963},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4554228186607361},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.40705394744873047},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38084372878074646},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1387292444705963},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2025.3528359","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2025.3528359","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320709","display_name":"National Research Foundation Singapore","ror":"https://ror.org/03cpyc314"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W135244556","https://openalex.org/W2023701920","https://openalex.org/W2102146461","https://openalex.org/W2380489056","https://openalex.org/W2428180336","https://openalex.org/W2471520273","https://openalex.org/W2964138190","https://openalex.org/W2972802841","https://openalex.org/W3016021263","https://openalex.org/W3016160783","https://openalex.org/W3048217770","https://openalex.org/W3090474612","https://openalex.org/W3095012670","https://openalex.org/W3097297926","https://openalex.org/W3162948689","https://openalex.org/W3196543367","https://openalex.org/W3197324626","https://openalex.org/W3198123658","https://openalex.org/W3201257217","https://openalex.org/W4224612669","https://openalex.org/W4283067311","https://openalex.org/W4287854807","https://openalex.org/W4296068767","https://openalex.org/W4309874973","https://openalex.org/W4372260474","https://openalex.org/W4372267432","https://openalex.org/W4381786045","https://openalex.org/W4385764360","https://openalex.org/W4398152753","https://openalex.org/W6750489868","https://openalex.org/W6762643587","https://openalex.org/W6778823374","https://openalex.org/W6802169104","https://openalex.org/W6810423869","https://openalex.org/W6847363464","https://openalex.org/W6850334629"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2355553914","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W149862513","https://openalex.org/W2347684782","https://openalex.org/W187117048"],"abstract_inverted_index":{"Current":[0],"strategies":[1],"for":[2,84,98],"achieving":[3],"fine-grained":[4],"prosody":[5,43,123],"control":[6],"in":[7,45,109],"speech":[8],"synthesis":[9],"entail":[10],"extracting":[11],"additional":[12],"style":[13],"embeddings":[14],"or":[15,37],"adopting":[16],"more":[17],"complex":[18],"architectures.":[19],"To":[20,112],"enable":[21],"zero-shot":[22,60],"application":[23],"of":[24,79,124],"pretrained":[25],"text-to-speech":[26],"(TTS)":[27],"models,":[28],"we":[29,103,116],"present":[30],"PRESENT":[31,119],"(PRosody":[32],"Editing":[33],"without":[34],"Style":[35],"Embeddings":[36],"New":[38],"Training),":[39],"which":[40],"exploits":[41],"explicit":[42],"prediction":[44],"FastSpeech2-based":[46],"models":[47],"by":[48,95],"modifying":[49],"the":[50,91,122,152],"inference":[51],"process":[52],"directly.":[53],"We":[54,73,142],"apply":[55],"our":[56,156],"text-to-prosody":[57],"framework":[58],"to":[59,129],"language":[61,134],"transfer":[62],"using":[63],"a":[64,107,132],"JETS":[65,153],"model":[66],"exclusively":[67],"trained":[68],"on":[69],"English":[70],"LJSpeech":[71],"data.":[72],"obtain":[74],"character":[75],"error":[76],"rates":[77],"(CER)":[78],"12.8%,":[80],"18.7%":[81],"and":[82,87,126,147,158,171],"5.9%":[83],"German,":[85],"Hungarian":[86],"Spanish":[88],"respectively,":[89],"beating":[90],"previous":[92],"state-of-the-art":[93],"CER":[94,146,150],"over":[96],"2\u00d7":[97],"all":[99],"three":[100],"languages.":[101],"Furthermore,":[102],"allow":[104],"subphoneme-level":[105],"control,":[106],"first":[108],"this":[110],"field.":[111],"evaluate":[113],"its":[114],"effectiveness,":[115],"show":[117],"that":[118],"can":[120],"improve":[121],"questions,":[125],"use":[127],"it":[128],"generate":[130],"Mandarin,":[131],"tonal":[133],"where":[135],"vowel":[136],"pitch":[137],"varies":[138],"at":[139],"subphoneme":[140],"level.":[141],"attain":[143],"25.3%":[144],"hanzi":[145],"13.0%":[148],"pinyin":[149],"with":[151],"model.":[154],"All":[155],"code":[157],"audio":[159],"samples":[160],"<xref":[161],"ref-type=\"fn\"":[162],"rid=\"fn1\"":[163],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[164,168],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><sup>1</sup></xref>":[165],"<fn":[166],"id=\"fn1\"":[167],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><label><sup>1</sup></label>":[169],"<uri>https://github.com/iamanigeeit/present</uri>":[170],"<uri>https://present2024.web.app/</uri>":[172],"</fn>":[173],"are":[174],"available":[175],"online.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
