{"id":"https://openalex.org/W4402571298","doi":"https://doi.org/10.1109/snpd61259.2024.10673914","title":"Analysis of Subjective Evaluation of Al Speech Synthesis Emotional Expressiveness","display_name":"Analysis of Subjective Evaluation of Al Speech Synthesis Emotional Expressiveness","publication_year":2024,"publication_date":"2024-07-05","ids":{"openalex":"https://openalex.org/W4402571298","doi":"https://doi.org/10.1109/snpd61259.2024.10673914"},"language":"en","primary_location":{"id":"doi:10.1109/snpd61259.2024.10673914","is_oa":false,"landing_page_url":"https://doi.org/10.1109/snpd61259.2024.10673914","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/ACIS 27th International Conference on Software Engineering, Artificial Intelligence, Networking and Parallel/Distributed Computing (SNPD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107287781","display_name":"Yihua Ao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yihua Ao","raw_affiliation_strings":["University of China Beijing,School of Music and Recording Arts Communication,China"],"affiliations":[{"raw_affiliation_string":"University of China Beijing,School of Music and Recording Arts Communication,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114227682","display_name":"Miaotong Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miaotong Yuan","raw_affiliation_strings":["University of China Beijing,School of Music and Recording Arts Communication,China"],"affiliations":[{"raw_affiliation_string":"University of China Beijing,School of Music and Recording Arts Communication,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5107287781"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6813,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.75410774,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"56","last_page":"61"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.45660001039505005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.45660001039505005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7074853777885437},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4863046109676361},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4613068401813507},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40180081129074097},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33800971508026123}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7074853777885437},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4863046109676361},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4613068401813507},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40180081129074097},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33800971508026123}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/snpd61259.2024.10673914","is_oa":false,"landing_page_url":"https://doi.org/10.1109/snpd61259.2024.10673914","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/ACIS 27th International Conference on Software Engineering, Artificial Intelligence, Networking and Parallel/Distributed Computing (SNPD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1519761837","https://openalex.org/W1971362524","https://openalex.org/W1973378890","https://openalex.org/W2030067664","https://openalex.org/W2069924379","https://openalex.org/W2121329867","https://openalex.org/W2154611638","https://openalex.org/W2889783224","https://openalex.org/W2976159681","https://openalex.org/W3045907799","https://openalex.org/W4245744384","https://openalex.org/W4245845544","https://openalex.org/W4376457142","https://openalex.org/W4392904830"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W127416991"],"abstract_inverted_index":{"AI":[0,41,80,168],"speech":[1,42,81,169,178,219],"synthesis":[2,19],"is":[3,21,88,136,176],"widely":[4],"used":[5,89],"in":[6,94,123,133,190,207,255,276],"audio":[7],"books,":[8],"movie":[9],"and":[10,15,50,69,108,129,151,187,206,231,274,281],"TV":[11],"dubbing,":[12],"virtual":[13],"human":[14,264],"other":[16],"fields.":[17],"Speech":[18],"technology":[20],"no":[22],"longer":[23],"limited":[24],"to":[25,30,38,90,155,159,247],"simply":[26],"\u201cunderstandable\u201d,":[27],"the":[28,46,53,61,75,101,110,113,121,146,160,163,171,177,183,191,208,212,216,225,229,237,252,256,272,277],"ability":[29,78,104,166,240],"express":[31],"emotion":[32,267],"has":[33],"become":[34],"a":[35,95],"key":[36],"factor":[37,175],"measure":[39],"whether":[40],"can":[43,234],"horizontally":[44],"expand":[45],"application\u2019s":[47,54],"actual":[48],"scene":[49],"vertically":[51],"enhance":[52],"potential.":[55],"In":[56],"this":[57],"article,":[58],"we":[59],"use":[60,192],"MUSHRA":[62,84],"(Multi":[63],"Stimulus":[64],"test":[65],"with":[66,128,195,266],"Hidden":[67],"Reference":[68],"Anchor)":[70],"subjective":[71,85],"evaluation":[72,98,114],"experiment":[73,86],"for":[74,145,251],"emotional":[76,102,147,164,196,238],"expression":[77,103,148,165,239],"of":[79,97,105,125,149,167,193,218,227,241,258,279],"synthesis.":[82],"The":[83,243],"method":[87],"score":[91],"different":[92,106,143],"timbres":[93],"series":[96],"dimensions,":[99],"compare":[100],"timbres,":[107],"explore":[109],"relationship":[111],"between":[112],"dimensions.":[115],"Our":[116],"experimental":[117],"results":[118,244],"show":[119],"that":[120],"difference":[122],"perception":[124],"voice":[126,209,265],"timbre":[127,153,184,202],"without":[130],"professional":[131],"training":[132],"vocal":[134],"broadcasting":[135],"not":[137],"obvious.":[138],"Different":[139],"application":[140],"scenarios":[141,194],"have":[142],"requirements":[144],"timbre,":[150],"certain":[152],"needs":[154],"be":[156,204,221],"selected":[157],"according":[158],"usage.":[161],"For":[162],"synthesis,":[170],"most":[172],"important":[173],"influencing":[174],"speed":[179,217],"adaptation,":[180,185],"followed":[181],"by":[182],"naturalness":[186,230],"fluency.":[188],"Therefore,":[189],"accuracy":[197],"requirements,":[198],"those":[199],"highly":[200],"adaptive":[201],"should":[203,220],"preferred,":[205],"adjustment":[210],"at":[211],"post":[213],"production":[214],"stage,":[215],"adjusted":[222],"appropriately":[223],"under":[224],"premise":[226],"ensuring":[228],"fluency,":[232],"which":[233],"effectively":[235],"improve":[236],"timbre.":[242],"are":[245],"expected":[246],"provide":[248],"new":[249],"insights":[250],"related":[253],"research":[254],"fields":[257,278],"artificial":[259],"intelligence,":[260],"deep":[261],"learning":[262],"on":[263],"study,":[268],"as":[269,271],"well":[270],"exploration":[273],"practice":[275],"phonetics":[280],"intelligent":[282],"speech.":[283]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
