{"id":"https://openalex.org/W3208665898","doi":"https://doi.org/10.1109/sped53181.2021.9587366","title":"Project V\u0101c: Can a Text-to-Speech Engine Generate Human Sentiments?","display_name":"Project V\u0101c: Can a Text-to-Speech Engine Generate Human Sentiments?","publication_year":2021,"publication_date":"2021-10-13","ids":{"openalex":"https://openalex.org/W3208665898","doi":"https://doi.org/10.1109/sped53181.2021.9587366","mag":"3208665898"},"language":"en","primary_location":{"id":"doi:10.1109/sped53181.2021.9587366","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sped53181.2021.9587366","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Conference on Speech Technology and Human-Computer Dialogue (SpeD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065082910","display_name":"Shivam Kulkarni","orcid":null},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shivam Kulkarni","raw_affiliation_strings":["Illinois Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060297911","display_name":"Luis Barbado","orcid":null},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luis Barbado","raw_affiliation_strings":["Illinois Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003078437","display_name":"Jordan Hosier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jordan Hosier","raw_affiliation_strings":["Northwestern University","Vail Systems, Inc"],"affiliations":[{"raw_affiliation_string":"Northwestern University","institution_ids":[]},{"raw_affiliation_string":"Vail Systems, Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065142782","display_name":"Yu Zhou","orcid":"https://orcid.org/0000-0003-4389-3972"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu Zhou","raw_affiliation_strings":["Vail Systems, Inc"],"affiliations":[{"raw_affiliation_string":"Vail Systems, Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027325992","display_name":"Siddharth Rajagopalan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Siddharth Rajagopalan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5067975955","display_name":"Vijay K. Gurbani","orcid":"https://orcid.org/0000-0002-9386-1533"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vijay K. Gurbani","raw_affiliation_strings":["Illinois Institute of Technology","Vail Systems, Inc"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology","institution_ids":["https://openalex.org/I180949307"]},{"raw_affiliation_string":"Vail Systems, Inc","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5065082910"],"corresponding_institution_ids":["https://openalex.org/I180949307"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14056795,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"4","issue":null,"first_page":"103","last_page":"108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8298250436782837},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.6630212664604187},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.570712685585022},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5532203316688538},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5418740510940552},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5361834168434143},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4863939583301544},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47214391827583313},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46622753143310547},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.45342588424682617},{"id":"https://openalex.org/keywords/part-of-speech","display_name":"Part of speech","score":0.44198980927467346},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.41068243980407715}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8298250436782837},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.6630212664604187},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.570712685585022},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5532203316688538},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5418740510940552},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5361834168434143},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4863939583301544},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47214391827583313},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46622753143310547},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.45342588424682617},{"id":"https://openalex.org/C123406163","wikidata":"https://www.wikidata.org/wiki/Q82042","display_name":"Part of speech","level":2,"score":0.44198980927467346},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.41068243980407715},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sped53181.2021.9587366","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sped53181.2021.9587366","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Conference on Speech Technology and Human-Computer Dialogue (SpeD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6800000071525574,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1585912603","https://openalex.org/W2061900096","https://openalex.org/W2085662862","https://openalex.org/W2128837546","https://openalex.org/W2144005487","https://openalex.org/W2146334809","https://openalex.org/W2556418146","https://openalex.org/W2592109500","https://openalex.org/W2753840835","https://openalex.org/W2772380162","https://openalex.org/W2803193013","https://openalex.org/W2903795704","https://openalex.org/W2905562398","https://openalex.org/W2929728775","https://openalex.org/W2936162287","https://openalex.org/W2982645239","https://openalex.org/W3032925342","https://openalex.org/W3045823842","https://openalex.org/W3081192838","https://openalex.org/W3100511085","https://openalex.org/W3156627980","https://openalex.org/W4233906183","https://openalex.org/W6681409903","https://openalex.org/W6756813850"],"related_works":["https://openalex.org/W2355553914","https://openalex.org/W149862513","https://openalex.org/W2347684782","https://openalex.org/W4320472397","https://openalex.org/W2548633793","https://openalex.org/W2401269021","https://openalex.org/W2941935829","https://openalex.org/W2145654520","https://openalex.org/W2588413092","https://openalex.org/W4247866851"],"abstract_inverted_index":{"Sentiment":[0],"analysis":[1,69],"is":[2,13,28,73,115,192,235,250],"an":[3,61,87,203],"important":[4],"area":[5,27],"of":[6,22,50,76,80,86,103,164,178,187,206],"natural":[7],"language":[8],"processing":[9],"(NLP)":[10],"research,":[11],"and":[12,140,241,269],"increasingly":[14],"being":[15],"performed":[16],"by":[17,106],"machine":[18],"learning":[19],"models.":[20],"Much":[21],"the":[23,45,51,74,109,141,146,162,222,232,247,254,258,270],"work":[24],"in":[25,91,145,153,176,220],"this":[26,92,113,265],"concentrated":[29],"on":[30,70,209,237],"extracting":[31],"sentiment":[32,68,212],"from":[33,60,100,253,276],"textual":[34,40],"data":[35],"sources.":[36],"Clearly":[37],"however,":[38],"a":[39,101,165,189,199,211,238],"source":[41,245],"does":[42,273],"not":[43,251,274],"convey":[44],"pitch,":[46],"prosody,":[47],"or":[48,183],"power":[49],"spoken":[52],"sentiment,":[53,81],"making":[54],"it":[55,194],"attractive":[56],"to":[57,96,121,128,169,224],"extract":[58],"sentiments":[59],"audio":[62,71,98,110],"stream.":[63],"A":[64],"fundamental":[65],"prerequisite":[66],"for":[67],"streams":[72],"availability":[75],"reliable":[77],"acoustic":[78],"representation":[79],"appropriately":[82],"labeled.":[83],"The":[84,185],"lack":[85],"existing,":[88],"large-scale":[89],"dataset":[90,249],"form":[93],"forces":[94],"researchers":[95],"curate":[97],"datasets":[99,134],"variety":[102],"sources,":[104],"often":[105,137],"manually":[107],"labeling":[108,147],"corpus.":[111],"However,":[112],"approach":[114,266],"inherently":[116],"subjective.":[117],"What":[118],"appears":[119],"\u201cpositive\u201d":[120],"one":[122,177],"human":[123],"listener":[124],"may":[125],"appear":[126],"\u201cneutral\u201d":[127],"another.":[129],"Such":[130],"challenges":[131],"yield":[132],"sub-optimal":[133],"that":[135,193,201,264],"are":[136],"class":[138],"imbalanced,":[139],"inevitable":[142],"biases":[143],"present":[144],"process":[148],"can":[149,195,215],"permeate":[150],"these":[151,158],"models":[152],"problematic":[154],"ways.":[155],"To":[156],"mitigate":[157],"disadvantages,":[159],"we":[160],"propose":[161],"use":[163],"text-to-speech":[166],"(TTS)":[167],"engine":[168,191],"generate":[170],"labeled":[171,207],"synthetic":[172],"voice":[173],"samples":[174],"rendered":[175],"three":[179],"sentiments:":[180],"positive,":[181],"negative,":[182],"neutral.":[184],"advantage":[186],"using":[188],"TTS":[190],"be":[196,216],"abstracted":[197],"as":[198,257],"function":[200],"generates":[202],"infinite":[204],"set":[205],"samples,":[208],"which":[210,225],"detection":[213],"model":[214,234,272],"trained.":[217],"We":[218],"investigate,":[219],"particular,":[221],"extent":[223],"such":[226],"training":[227,259],"exhibits":[228],"acceptable":[229],"accuracy":[230],"when":[231],"induced":[233,271],"tested":[236],"separate,":[239],"independent":[240],"identically":[242],"distributed":[243],"speech":[244],"(i.e.,":[246],"test":[248],"drawn":[252],"same":[255],"distribution":[256],"dataset).":[260],"Our":[261],"results":[262],"indicate":[263],"shows":[267],"promise":[268],"suffer":[275],"underspecification.":[277]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
