{"id":"https://openalex.org/W4415821770","doi":"https://doi.org/10.1109/ro-man63969.2025.11217645","title":"EmojiVoice: Towards long-term controllable expressivity in robot speech","display_name":"EmojiVoice: Towards long-term controllable expressivity in robot speech","publication_year":2025,"publication_date":"2025-08-25","ids":{"openalex":"https://openalex.org/W4415821770","doi":"https://doi.org/10.1109/ro-man63969.2025.11217645"},"language":null,"primary_location":{"id":"doi:10.1109/ro-man63969.2025.11217645","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ro-man63969.2025.11217645","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 34th IEEE International Conference on Robot and Human Interactive Communication (RO-MAN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064388924","display_name":"Paige Tutt\u00f6s\u00ed","orcid":"https://orcid.org/0000-0002-2082-8548"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Paige Tutt\u00f6s\u00ed","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,Burnaby,Canada,8888"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,Burnaby,Canada,8888","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103054907","display_name":"Shivam Mehta","orcid":"https://orcid.org/0000-0002-1886-681X"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Shivam Mehta","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Speech Music and Hearing"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Speech Music and Hearing","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117519064","display_name":"Zachary Syvenky","orcid":null},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Zachary Syvenky","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,Burnaby,Canada,8888"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,Burnaby,Canada,8888","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114390793","display_name":"Bermet Burkanova","orcid":null},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Bermet Burkanova","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,Burnaby,Canada,8888"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,Burnaby,Canada,8888","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091432739","display_name":"Gustav Eje Henter","orcid":"https://orcid.org/0000-0002-1643-1054"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Gustav Eje Henter","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Speech Music and Hearing"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Speech Music and Hearing","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054803584","display_name":"Angelica Lim","orcid":"https://orcid.org/0000-0001-9288-0380"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Angelica Lim","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,Burnaby,Canada,8888"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,Burnaby,Canada,8888","institution_ids":["https://openalex.org/I18014758"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3299954,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"657","last_page":"664"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.7235999703407288,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.7235999703407288,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.052799999713897705,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.04740000143647194,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/expressivity","display_name":"Expressivity","score":0.9273999929428101},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.7533000111579895},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5177000164985657},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5056999921798706},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.4677000045776367},{"id":"https://openalex.org/keywords/storytelling","display_name":"Storytelling","score":0.4309000074863434},{"id":"https://openalex.org/keywords/emoji","display_name":"Emoji","score":0.42160001397132874},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.38199999928474426}],"concepts":[{"id":"https://openalex.org/C92811239","wikidata":"https://www.wikidata.org/wiki/Q20998670","display_name":"Expressivity","level":2,"score":0.9273999929428101},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.7533000111579895},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.728600025177002},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.544700026512146},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5177000164985657},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5056999921798706},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.4677000045776367},{"id":"https://openalex.org/C2776538412","wikidata":"https://www.wikidata.org/wiki/Q989963","display_name":"Storytelling","level":3,"score":0.4309000074863434},{"id":"https://openalex.org/C2779247141","wikidata":"https://www.wikidata.org/wiki/Q1049294","display_name":"Emoji","level":3,"score":0.42160001397132874},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39989998936653137},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.38199999928474426},{"id":"https://openalex.org/C10090317","wikidata":"https://www.wikidata.org/wiki/Q7551030","display_name":"Social cue","level":2,"score":0.3513999879360199},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.3499999940395355},{"id":"https://openalex.org/C30539005","wikidata":"https://www.wikidata.org/wiki/Q1066689","display_name":"Human communication","level":2,"score":0.3384000062942505},{"id":"https://openalex.org/C162947575","wikidata":"https://www.wikidata.org/wiki/Q2005645","display_name":"Social robot","level":5,"score":0.3368000090122223},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.2930999994277954},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.28130000829696655},{"id":"https://openalex.org/C2780829048","wikidata":"https://www.wikidata.org/wiki/Q1624720","display_name":"Conversation analysis","level":3,"score":0.2759999930858612},{"id":"https://openalex.org/C130064352","wikidata":"https://www.wikidata.org/wiki/Q853725","display_name":"Social relation","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.25929999351501465},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2554999887943268},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ro-man63969.2025.11217645","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ro-man63969.2025.11217645","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 34th IEEE International Conference on Robot and Human Interactive Communication (RO-MAN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W3209839","https://openalex.org/W1964469912","https://openalex.org/W1983356675","https://openalex.org/W2006044072","https://openalex.org/W2066064791","https://openalex.org/W2098218620","https://openalex.org/W2171121512","https://openalex.org/W2502125069","https://openalex.org/W2520111737","https://openalex.org/W2623107180","https://openalex.org/W2782545884","https://openalex.org/W2963733582","https://openalex.org/W2964283259","https://openalex.org/W2980428304","https://openalex.org/W2997207789","https://openalex.org/W3010193774","https://openalex.org/W3096104971","https://openalex.org/W3150572638","https://openalex.org/W3182074706","https://openalex.org/W4200442117","https://openalex.org/W4288359812","https://openalex.org/W4293218370","https://openalex.org/W4323536841","https://openalex.org/W4385822941","https://openalex.org/W4385993887","https://openalex.org/W4388182168","https://openalex.org/W4390926469","https://openalex.org/W4391709315","https://openalex.org/W4392903192","https://openalex.org/W4392931276","https://openalex.org/W4396941304","https://openalex.org/W4397124197","https://openalex.org/W4400679824","https://openalex.org/W4401414235","https://openalex.org/W4402111427","https://openalex.org/W4402112010","https://openalex.org/W4402112244","https://openalex.org/W4402115961","https://openalex.org/W4402672020","https://openalex.org/W4405087661"],"related_works":[],"abstract_inverted_index":{"Humans":[0],"vary":[1],"their":[2,13],"expressivity":[3,44,87,138],"when":[4],"speaking":[5],"for":[6],"extended":[7],"periods":[8],"to":[9,19,41,52,70,82,98],"maintain":[10],"engagement":[11],"with":[12,22,112],"listener.":[14],"Although":[15],"social":[16,68,77],"robots":[17],"tend":[18],"be":[20],"deployed":[21],"\"expressive\"":[23],"joyful":[24],"voices,":[25],"they":[26,49],"lack":[27],"this":[28],"long-term":[29],"variation":[30],"found":[31,128],"in":[32,45,101,145,155],"human":[33,46],"speech.":[34],"Foundation":[35],"model":[36],"text-to-speech":[37,63],"systems":[38],"are":[39,50],"beginning":[40],"mimic":[42],"the":[43,94,135,156],"speech,":[47],"but":[48,149],"difficult":[51],"deploy":[53],"offline":[54],"on":[55,76,88],"robots.":[56,78],"We":[57,79,103,127],"present":[58],"EmojiVoice,":[59],"a":[60,89,109,113,117,142,146],"free,":[61],"customizable":[62],"(TTS)":[64],"toolkit":[65],"that":[66,129],"allows":[67],"roboticists":[69],"build":[71],"temporally":[72],"variable,":[73],"expressive":[74,150],"speech":[75,100,140],"introduce":[80],"emoji-prompting":[81],"allow":[83],"fine-grained":[84],"control":[85],"of":[86,139],"phase":[90],"level":[91],"and":[92,120,137],"use":[93,158],"lightweight":[95],"Matcha-TTS":[96],"backbone":[97],"generate":[99],"real-time.":[102],"explore":[104],"three":[105],"case":[106],"studies:":[107],"(1)":[108],"scripted":[110],"conversation":[111],"robot":[114],"assistant,":[115],"(2)":[116],"storytelling":[118,147],"robot,":[119],"(3)":[121],"an":[122],"autonomous":[123],"speech-to-speech":[124],"interactive":[125],"agent.":[126],"using":[130],"varied":[131],"emoji":[132],"prompting":[133],"improved":[134],"perception":[136],"over":[141],"long":[143],"period":[144],"task,":[148],"voice":[151],"was":[152],"not":[153],"preferred":[154],"assistant":[157],"case.":[159]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-03T00:00:00"}
