{"id":"https://openalex.org/W4415333486","doi":"https://doi.org/10.48550/arxiv.2506.15107","title":"I Know You're Listening: Adaptive Voice for HRI","display_name":"I Know You're Listening: Adaptive Voice for HRI","publication_year":2025,"publication_date":"2025-06-18","ids":{"openalex":"https://openalex.org/W4415333486","doi":"https://doi.org/10.48550/arxiv.2506.15107"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2506.15107","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.15107","pdf_url":"https://arxiv.org/pdf/2506.15107","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2506.15107","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064388924","display_name":"Paige Tutt\u00f6s\u00ed","orcid":"https://orcid.org/0000-0002-2082-8548"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tutt\u00f6s\u00ed, Paige","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5064388924"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9351999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9351999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/clarity","display_name":"CLARITY","score":0.7260000109672546},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5464000105857849},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.46230000257492065},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.41929998993873596},{"id":"https://openalex.org/keywords/expressivity","display_name":"Expressivity","score":0.35600000619888306},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.34360000491142273}],"concepts":[{"id":"https://openalex.org/C2777146004","wikidata":"https://www.wikidata.org/wiki/Q14949826","display_name":"CLARITY","level":2,"score":0.7260000109672546},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6061000227928162},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5464000105857849},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.46230000257492065},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.41929998993873596},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36809998750686646},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.36640000343322754},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3578000068664551},{"id":"https://openalex.org/C92811239","wikidata":"https://www.wikidata.org/wiki/Q20998670","display_name":"Expressivity","level":2,"score":0.35600000619888306},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.34360000491142273},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.3377000093460083},{"id":"https://openalex.org/C162947575","wikidata":"https://www.wikidata.org/wiki/Q2005645","display_name":"Social robot","level":5,"score":0.3066999912261963},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2802000045776367},{"id":"https://openalex.org/C2776035688","wikidata":"https://www.wikidata.org/wiki/Q1606558","display_name":"Affect (linguistics)","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C94922259","wikidata":"https://www.wikidata.org/wiki/Q33215","display_name":"Constructed language","level":2,"score":0.2578999996185303}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2506.15107","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.15107","pdf_url":"https://arxiv.org/pdf/2506.15107","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2506.15107","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2506.15107","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2506.15107","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.15107","pdf_url":"https://arxiv.org/pdf/2506.15107","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415333486.pdf","grobid_xml":"https://content.openalex.org/works/W4415333486.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"the":[1,39,62,163,235,241],"use":[2,78,215],"of":[3,41,51,75,90,121,175,196,237],"social":[4,139],"robots":[5,42],"for":[6,21,38,43,64,118,189,201,243,252],"language":[7,22,27,44],"teaching":[8,23,45,53],"has":[9],"been":[10],"explored,":[11],"there":[12],"remains":[13],"limited":[14,102],"work":[15],"on":[16],"a":[17,29,65,72,88,133,206,256],"task-specific":[18],"synthesized":[19],"voices":[20,55,145],"robots.":[24],"Given":[25],"that":[26,86,151,198,234,254],"is":[28],"verbal":[30],"task,":[31],"this":[32,49,110],"gap":[33],"may":[34],"have":[35],"severe":[36],"consequences":[37],"effectiveness":[40],"tasks.":[46],"We":[47,60,128,149,180,204,232],"address":[48,61],"lack":[50],"L2":[52,190,213,244],"robot":[54,69],"through":[56],"three":[57],"contributions:":[58],"1.":[59],"need":[63],"lightweight":[66],"and":[67,116,138,154,159,169,226,246,276],"expressive":[68,84,122],"voice.":[70],"Using":[71],"fine-tuned":[73],"version":[74],"Matcha-TTS,":[76],"we":[77,108],"emoji":[79],"prompting":[80],"to":[81,131,136,142,210,218,258,271],"create":[82,181],"an":[83,182,248],"voice":[85,95,111,135,165],"shows":[87],"range":[89],"expressivity":[91],"over":[92],"time.":[93],"The":[94],"can":[96],"run":[97],"in":[98,146,157,230,285],"real":[99],"time":[100],"with":[101,186,222],"compute":[103],"resources.":[104],"Through":[105],"case":[106],"studies,":[107],"found":[109,150,233,270],"more":[112,167,173,273],"expressive,":[113],"socially":[114],"appropriate,":[115],"suitable":[117],"long":[119],"periods":[120],"speech,":[123],"such":[124],"as":[125],"storytelling.":[126],"2.":[127],"explore":[129],"how":[130,212],"adapt":[132],"robot's":[134,164],"physical":[137],"ambient":[140],"environments":[141,161],"deploy":[143],"our":[144],"various":[147],"locations.":[148],"increasing":[152],"pitch":[153,155],"rate":[156],"noisy":[158],"high-energy":[160],"makes":[162,170],"appear":[166],"appropriate":[168],"it":[171],"seem":[172],"aware":[174],"its":[176],"current":[177],"environment.":[178],"3.":[179],"English":[183],"TTS":[184],"system":[185],"improved":[187],"clarity":[188,250,267],"listeners":[191,245],"using":[192],"known":[193],"linguistic":[194],"properties":[195],"vowels":[197,229,238,260,264],"are":[199],"difficult":[200],"these":[202,286],"listeners.":[203],"used":[205],"data-driven,":[207],"perception-based":[208],"approach":[209],"understand":[211],"speakers":[214],"duration":[216,236],"cues":[217],"interpret":[219],"challenging":[220,287],"words":[221],"minimal":[223,289],"tense":[224,259],"(long)":[225],"lax":[227,263],"(short)":[228],"English.":[231],"strongly":[239],"influences":[240],"perception":[242],"created":[247],"\"L2":[249],"mode\"":[251],"Matcha-TTS":[253,280],"applies":[255],"lengthening":[257],"while":[261,281],"leaving":[262],"unchanged.":[265],"Our":[266],"mode":[268],"was":[269],"be":[272],"respectful,":[274],"intelligible,":[275],"encouraging":[277],"than":[278],"base":[279],"reducing":[282],"transcription":[283],"errors":[284],"tense/lax":[288],"pairs.":[290]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-19T00:00:00"}
