{"id":"https://openalex.org/W4402982072","doi":"https://doi.org/10.1109/icme57554.2024.10687845","title":"AdaStyleSpeech: A Fast Stylized Speech Synthesis Model Based on Adaptive Instance Normalization","display_name":"AdaStyleSpeech: A Fast Stylized Speech Synthesis Model Based on Adaptive Instance Normalization","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4402982072","doi":"https://doi.org/10.1109/icme57554.2024.10687845"},"language":"en","primary_location":{"id":"doi:10.1109/icme57554.2024.10687845","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687845","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101902224","display_name":"Yuming Yang","orcid":"https://orcid.org/0000-0002-6702-2795"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuming Yang","raw_affiliation_strings":["Chongqing University,College of Computer Science,Chongqing,China"],"affiliations":[{"raw_affiliation_string":"Chongqing University,College of Computer Science,Chongqing,China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110548125","display_name":"Dongsheng Zou","orcid":null},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongsheng Zou","raw_affiliation_strings":["Chongqing University,College of Computer Science,Chongqing,China"],"affiliations":[{"raw_affiliation_string":"Chongqing University,College of Computer Science,Chongqing,China","institution_ids":["https://openalex.org/I158842170"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101902224"],"corresponding_institution_ids":["https://openalex.org/I158842170"],"apc_list":null,"apc_paid":null,"fwci":0.7724,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.76763668,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9355999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stylized-fact","display_name":"Stylized fact","score":0.8813825249671936},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.8299233317375183},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7816091179847717},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6177216172218323},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4224257469177246}],"concepts":[{"id":"https://openalex.org/C38935604","wikidata":"https://www.wikidata.org/wiki/Q4330363","display_name":"Stylized fact","level":2,"score":0.8813825249671936},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.8299233317375183},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7816091179847717},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6177216172218323},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4224257469177246},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme57554.2024.10687845","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687845","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2603777577","https://openalex.org/W2890964092","https://openalex.org/W2903739847","https://openalex.org/W2963539064","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W3015212100","https://openalex.org/W3095936335","https://openalex.org/W3152136404","https://openalex.org/W3197704090","https://openalex.org/W3198533616","https://openalex.org/W4312290555"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2529311304","https://openalex.org/W4248275646","https://openalex.org/W2992609826","https://openalex.org/W3124809058","https://openalex.org/W2552900035","https://openalex.org/W2162875951","https://openalex.org/W2062875858","https://openalex.org/W4380047323"],"abstract_inverted_index":{"Stylized":[0],"speech":[1,10,19,45,71,103,128,137,148],"synthesis":[2,82,119,152],"transforms":[3],"text":[4],"into":[5,69],"a":[6,40,80,108,123],"specific":[7],"style":[8,53,63],"of":[9],"guided":[11],"by":[12],"reference":[13,56,136],"speech.":[14,57],"Despite":[15],"recent":[16],"advancements":[17],"in":[18,23,28,118,126,147],"synthesis,":[20],"challenges":[21],"persist":[22],"this":[24],"domain,":[25],"including":[26],"limitations":[27],"quality,":[29,120],"speed,":[30],"and":[31,62,89,98,143,151],"similarity.":[32],"To":[33],"address":[34],"these":[35],"issues,":[36],"we":[37,77],"introduce":[38],"AdaStyleSpeech,":[39,94],"novel":[41],"model":[42,48,83],"for":[43,107],"stylized":[44,70,127],"synthesis.":[46,129],"This":[47],"can":[49,99],"directly":[50],"extract":[51],"the":[52,105,132,140],"vector":[54],"from":[55],"By":[58],"combining":[59],"textual":[60],"information":[61,88],"vectors,":[64],"AdaStyleSpeech":[65,114],"effectively":[66],"transfers":[67],"content":[68],"using":[72],"adaptive":[73],"instance":[74],"normalization.":[75],"Additionally,":[76],"present":[78],"AdaGANSpeech,":[79],"multi-style":[81],"based":[84],"on":[85,135],"stylistic":[86],"mutual":[87],"generative":[90],"adversarial":[91],"networks.":[92],"Unlike":[93],"it":[95,122],"works":[96],"faster":[97],"generate":[100],"more":[101],"diverse":[102],"without":[104],"need":[106],"reference.":[109],"Experimental":[110],"results":[111],"demonstrate":[112],"that":[113],"attains":[115],"remarkable":[116],"outcomes":[117],"making":[121],"State-of-the-Art":[124],"solution":[125],"AdaGANSpeech":[130],"addresses":[131],"AdaStyleSpeech\u2019s":[133],"reliance":[134],"guidance":[138],"during":[139],"generation":[141],"phase":[142],"exhibits":[144],"notable":[145],"advantages":[146],"diversity,":[149],"clarity,":[150],"speed.":[153]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
