{"id":"https://openalex.org/W4416798680","doi":"https://doi.org/10.1109/apsipaasc65261.2025.11249104","title":"Expressive Prompting: Improving Emotion Intensity and Speaker Consistency in Zero-Shot TTS","display_name":"Expressive Prompting: Improving Emotion Intensity and Speaker Consistency in Zero-Shot TTS","publication_year":2025,"publication_date":"2025-10-22","ids":{"openalex":"https://openalex.org/W4416798680","doi":"https://doi.org/10.1109/apsipaasc65261.2025.11249104"},"language":null,"primary_location":{"id":"doi:10.1109/apsipaasc65261.2025.11249104","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc65261.2025.11249104","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100427213","display_name":"Haoyu Wang","orcid":"https://orcid.org/0009-0006-2213-9443"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoyu Wang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028353824","display_name":"Chunyu Qiang","orcid":"https://orcid.org/0009-0007-2290-3074"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunyu Qiang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049261843","display_name":"Tianrui Wang","orcid":"https://orcid.org/0009-0005-1517-9589"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianrui Wang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Cheng Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Gong","raw_affiliation_strings":["Institute of Artificial Intelligence, China Telecom,China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, China Telecom,China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210136246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086017661","display_name":"Yu Jiang","orcid":"https://orcid.org/0000-0002-0728-5673"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Jiang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006690717","display_name":"Yuheng Lu","orcid":"https://orcid.org/0000-0002-2767-0894"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuheng Lu","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374052","display_name":"Chen Zhang","orcid":"https://orcid.org/0000-0001-5773-9090"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]},{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Zhang","raw_affiliation_strings":["Kuaishou Technology Co., Ltd,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Kuaishou Technology Co., Ltd,Beijing,China","institution_ids":["https://openalex.org/I4210155967","https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101745213","display_name":"Longbiao Wang","orcid":"https://orcid.org/0000-0002-8094-6861"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longbiao Wang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017251198","display_name":"Jianwu Dang","orcid":"https://orcid.org/0000-0002-9237-4821"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwu Dang","raw_affiliation_strings":["Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,Guangdong,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences,Guangdong,China","institution_ids":["https://openalex.org/I4210145761","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100427213"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19216208,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"555","last_page":"560"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8562999963760376,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8562999963760376,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.030300000682473183,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.009499999694526196,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.609499990940094},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.498199999332428},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.49810001254081726},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.46470001339912415},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.40230000019073486},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.36899998784065247},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.3515999913215637}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7508000135421753},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6797000169754028},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.609499990940094},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.498199999332428},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.49810001254081726},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.46470001339912415},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.40230000019073486},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.36899998784065247},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.3515999913215637},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35109999775886536},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.334199994802475},{"id":"https://openalex.org/C541956065","wikidata":"https://www.wikidata.org/wiki/Q2250680","display_name":"Speech error","level":3,"score":0.33320000767707825},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.31139999628067017},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.2874000072479248},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.28279998898506165},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C2986627078","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker identification","level":3,"score":0.25859999656677246}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc65261.2025.11249104","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc65261.2025.11249104","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7867606361","display_name":null,"funder_award_id":"U23B2053,62176182","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1965069145","https://openalex.org/W2102298245","https://openalex.org/W2962788625","https://openalex.org/W2970641574","https://openalex.org/W3161480375","https://openalex.org/W3163573274","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4255053369","https://openalex.org/W4283067311","https://openalex.org/W4312238419","https://openalex.org/W4372260402","https://openalex.org/W4382202688","https://openalex.org/W4384302749","https://openalex.org/W4389518664","https://openalex.org/W4392909624","https://openalex.org/W4401070302","https://openalex.org/W4402111427","https://openalex.org/W4402301063","https://openalex.org/W4402669711"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2],"speech":[3,69,86,176],"synthesis":[4],"have":[5],"enabled":[6],"large":[7],"language":[8],"model":[9,123,151],"(LLM)-based":[10],"systems":[11],"to":[12,37,49,152,174,187],"perform":[13],"zero-shot":[14,192],"generation":[15,40],"with":[16,160,177],"controllable":[17],"content,":[18],"timbre,":[19],"speaker":[20,56,129,184],"identity,":[21,185],"and":[22,59,106,131,137,182,190,197],"emotion":[23],"through":[24],"input":[25,163],"prompts.":[26],"As":[27],"a":[28,77,120,148],"result,":[29],"these":[30],"models":[31],"heavily":[32],"rely":[33],"on":[34],"prompt":[35,44,79,97,138,155,173],"design":[36],"guide":[38],"the":[39,89,117,135,141,154,161],"process.":[41],"However,":[42],"existing":[43],"selection":[45,80],"methods":[46],"often":[47],"fail":[48],"ensure":[50],"that":[51,156,168],"prompts":[52],"contain":[53],"sufficiently":[54],"stable":[55,191],"identity":[57],"cues":[58],"appropriate":[60],"emotional":[61,132,180],"intensity":[62],"indicators,":[63],"which":[64],"are":[65],"crucial":[66],"for":[67,84],"expressive":[68,85,189],"synthesis.":[70,87],"To":[71],"address":[72],"this":[73],"challenge,":[74],"we":[75,94,146],"propose":[76],"two-stage":[78],"strategy":[81,170],"specifically":[82],"designed":[83],"In":[88,140],"static":[90],"stage":[91,143],"(before":[92],"synthesis),":[93,145],"first":[95],"evaluate":[96],"candidates":[98,118],"using":[99],"pitch-based":[100],"prosodic":[101],"features,":[102],"perceptual":[103],"audio":[104],"quality,":[105],"text-emotion":[107],"coherence":[108],"scores":[109],"evaluated":[110],"by":[111,124],"an":[112],"LLM.":[113],"We":[114],"further":[115],"assess":[116],"under":[119],"specific":[121],"TTS":[122,193],"measuring":[125],"character":[126],"error":[127],"rate,":[128],"similarity,":[130],"similarity":[133,150],"between":[134],"synthesized":[136],"speech.":[139],"dynamic":[142],"(during":[144],"use":[147],"textual":[149],"select":[153],"is":[157],"most":[158],"aligned":[159],"current":[162],"text.":[164],"Experimental":[165],"results":[166],"demonstrate":[167],"our":[169],"effectively":[171],"selects":[172],"synthesize":[175],"both":[178],"high-intensity":[179],"expression":[181],"robust":[183],"leading":[186],"more":[188],"performance.":[194],"Audio":[195],"samples":[196],"codes":[198],"will":[199],"be":[200],"available":[201],"at":[202],"https://whyrrrrun.github.io/ExpPro.github.io/.":[203]},"counts_by_year":[],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-11-28T00:00:00"}
