{"id":"https://openalex.org/W7134900877","doi":"https://doi.org/10.48550/arxiv.2603.09307","title":"Paralinguistic Emotion-Aware Validation Timing Detection in Japanese Empathetic Spoken Dialogue","display_name":"Paralinguistic Emotion-Aware Validation Timing Detection in Japanese Empathetic Spoken Dialogue","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7134900877","doi":"https://doi.org/10.48550/arxiv.2603.09307"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.09307","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09307","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.09307","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113124502","display_name":"Zi Haur Pang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pang, Zi Haur","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028251075","display_name":"Yahui Fu","orcid":"https://orcid.org/0000-0001-8379-5993"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Yahui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128699609","display_name":"Yuan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Yuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128744884","display_name":"Tatsuya Kawahara","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kawahara, Tatsuya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.005400000140070915,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.002899999963119626,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/paralanguage","display_name":"Paralanguage","score":0.9253000020980835},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4765999913215637},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.39430001378059387},{"id":"https://openalex.org/keywords/feeling","display_name":"Feeling","score":0.3869999945163727},{"id":"https://openalex.org/keywords/storytelling","display_name":"Storytelling","score":0.3732999861240387},{"id":"https://openalex.org/keywords/dyad","display_name":"Dyad","score":0.33090001344680786},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.31029999256134033},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3068000078201294}],"concepts":[{"id":"https://openalex.org/C133378560","wikidata":"https://www.wikidata.org/wiki/Q1753225","display_name":"Paralanguage","level":2,"score":0.9253000020980835},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5583999752998352},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4878000020980835},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4765999913215637},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.42250001430511475},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.39430001378059387},{"id":"https://openalex.org/C122980154","wikidata":"https://www.wikidata.org/wiki/Q205555","display_name":"Feeling","level":2,"score":0.3869999945163727},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38019999861717224},{"id":"https://openalex.org/C2776538412","wikidata":"https://www.wikidata.org/wiki/Q989963","display_name":"Storytelling","level":3,"score":0.3732999861240387},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37070000171661377},{"id":"https://openalex.org/C2777716012","wikidata":"https://www.wikidata.org/wiki/Q5318389","display_name":"Dyad","level":2,"score":0.33090001344680786},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.31029999256134033},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3068000078201294},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C2988148770","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion detection","level":3,"score":0.2946999967098236},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C145633318","wikidata":"https://www.wikidata.org/wiki/Q207125","display_name":"Nonverbal communication","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.26019999384880066},{"id":"https://openalex.org/C137270730","wikidata":"https://www.wikidata.org/wiki/Q120811","display_name":"Detection theory","level":3,"score":0.2599000036716461},{"id":"https://openalex.org/C2779885105","wikidata":"https://www.wikidata.org/wiki/Q182263","display_name":"Empathy","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.09307","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09307","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.09307","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09307","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Emotional":[0,130],"Validation":[1],"is":[2,35],"a":[3,63,93,101,178],"psychotherapy":[4],"communication":[5],"technique":[6],"that":[7,144,158],"involves":[8],"recognizing,":[9],"understanding,":[10],"and":[11,17,22,43,58,65,84,99,112,139,142],"explicitly":[12],"acknowledging":[13],"another":[14],"person's":[15],"feelings":[16],"actions,":[18],"which":[19],"strengthens":[20],"alliance":[21],"reduces":[23],"negative":[24],"affect.":[25],"To":[26],"maximize":[27],"the":[28,52,115,119,128,145],"emotional":[29,59],"support":[30],"provided":[31],"by":[32],"validation,":[33],"it":[34,39],"crucial":[36],"to":[37,90,170],"deliver":[38],"with":[40,164],"appropriate":[41],"timing":[42,49,70,122],"frequency.":[44],"This":[45],"study":[46],"investigates":[47],"validation":[48,69,121,173],"detection":[50,71,123],"from":[51],"speech":[53,103,153,160],"perspective.":[54],"Leveraging":[55],"both":[56],"paralinguistic":[57],"information,":[60],"we":[61,78],"propose":[62],"paralinguistic-":[64],"emotion-aware":[66],"model":[67,117],"for":[68],"without":[72],"relying":[73],"on":[74,86,118,127],"textual":[75],"context.":[76],"Specifically,":[77],"first":[79],"conduct":[80],"continued":[81],"self-supervised":[82],"training":[83,140],"fine-tuning":[85],"different":[87],"HuBERT":[88],"backbones":[89],"obtain":[91],"(i)":[92],"paralinguistics-aware":[94],"Self-Supervised":[95],"Learning":[96],"(SSL)":[97],"encoder":[98],"(ii)":[100],"multi-task":[102],"emotion":[104],"classification":[105],"encoder.":[106],"We":[107],"then":[108],"fuse":[109],"these":[110],"encoders":[111],"further":[113],"fine-tune":[114],"combined":[116],"downstream":[120],"task.":[124],"Experimental":[125],"evaluations":[126],"TUT":[129],"Storytelling":[131],"Corpus":[132],"(TESC)":[133],"compare":[134],"multiple":[135],"models,":[136],"fusion":[137],"mechanisms,":[138],"strategies,":[141],"demonstrate":[143],"proposed":[146],"approach":[147],"achieves":[148],"significant":[149],"improvements":[150],"over":[151],"conventional":[152],"baselines.":[154],"Our":[155],"results":[156],"indicate":[157],"non-linguistic":[159],"cues,":[161],"when":[162,172],"integrated":[163],"affect-related":[165],"representations,":[166],"carry":[167],"sufficient":[168],"signal":[169],"decide":[171],"should":[174],"be":[175],"expressed,":[176],"offering":[177],"speech-first":[179],"pathway":[180],"toward":[181],"more":[182],"empathetic":[183],"human-robot":[184],"interaction.":[185]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-03-12T00:00:00"}
