{"id":"https://openalex.org/W7155199467","doi":"https://doi.org/10.48550/arxiv.2604.18982","title":"SAVOIR: Learning Social Savoir-Faire via Shapley-based Reward Attribution","display_name":"SAVOIR: Learning Social Savoir-Faire via Shapley-based Reward Attribution","publication_year":2026,"publication_date":"2026-04-21","ids":{"openalex":"https://openalex.org/W7155199467","doi":"https://doi.org/10.48550/arxiv.2604.18982"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.18982","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18982","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.18982","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134283730","display_name":"Xiachong Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Xiachong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134358703","display_name":"Yi Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134294396","display_name":"Xiaocheng Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Xiaocheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134212845","display_name":"Deyi Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Deyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134350091","display_name":"Libo Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Libo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134276655","display_name":"Yangfan Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Yangfan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134310562","display_name":"Lei Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Lei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111073904","display_name":"Weitao Ma","orcid":"https://orcid.org/0009-0007-8631-3858"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Weitao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134245694","display_name":"Yuxuan Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Yuxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134221310","display_name":"Chonghan Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Chonghan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134357609","display_name":"Bing Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Bing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134256451","display_name":"Lingpeng Kong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kong, Lingpeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5672000050544739,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5672000050544739,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1890999972820282,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.042399998754262924,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/attribution","display_name":"Attribution","score":0.7333999872207642},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.555400013923645},{"id":"https://openalex.org/keywords/interpersonal-communication","display_name":"Interpersonal communication","score":0.5450999736785889},{"id":"https://openalex.org/keywords/axiom","display_name":"Axiom","score":0.4927999973297119},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.44029998779296875},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43320000171661377},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.37209999561309814},{"id":"https://openalex.org/keywords/shapley-value","display_name":"Shapley value","score":0.3467000126838684},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.3346000015735626}],"concepts":[{"id":"https://openalex.org/C143299363","wikidata":"https://www.wikidata.org/wiki/Q900584","display_name":"Attribution","level":2,"score":0.7333999872207642},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5784000158309937},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.555400013923645},{"id":"https://openalex.org/C164850336","wikidata":"https://www.wikidata.org/wiki/Q3685487","display_name":"Interpersonal communication","level":2,"score":0.5450999736785889},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5034000277519226},{"id":"https://openalex.org/C167729594","wikidata":"https://www.wikidata.org/wiki/Q17736","display_name":"Axiom","level":2,"score":0.4927999973297119},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.44029998779296875},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43320000171661377},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4016999900341034},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.37770000100135803},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37529999017715454},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.37209999561309814},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.358599990606308},{"id":"https://openalex.org/C199022921","wikidata":"https://www.wikidata.org/wiki/Q240046","display_name":"Shapley value","level":3,"score":0.3467000126838684},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.3346000015735626},{"id":"https://openalex.org/C177142836","wikidata":"https://www.wikidata.org/wiki/Q44455","display_name":"Game theory","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C79416737","wikidata":"https://www.wikidata.org/wiki/Q2305519","display_name":"Social learning","level":2,"score":0.30550000071525574},{"id":"https://openalex.org/C91034043","wikidata":"https://www.wikidata.org/wiki/Q223642","display_name":"Interpersonal relationship","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C130064352","wikidata":"https://www.wikidata.org/wiki/Q853725","display_name":"Social relation","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C125773388","wikidata":"https://www.wikidata.org/wiki/Q792542","display_name":"Axiomatic system","level":3,"score":0.2856999933719635},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25450000166893005},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.18982","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18982","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.18982","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18982","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5807268619537354,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Social":[0],"intelligence,":[1],"the":[2,24,115],"ability":[3],"to":[4,33,43,86],"navigate":[5],"complex":[6],"interpersonal":[7],"interactions,":[8],"presents":[9],"a":[10,64],"fundamental":[11],"challenge":[12],"for":[13,94],"language":[14,41],"agents.":[15],"Training":[16],"such":[17],"agents":[18],"via":[19],"reinforcement":[20],"learning":[21],"requires":[22,152],"solving":[23],"credit":[25,103],"assignment":[26],"problem:":[27],"determining":[28],"how":[29],"individual":[30],"utterances":[31],"contribute":[32],"multi-turn":[34],"dialogue":[35],"outcomes.":[36],"Existing":[37],"approaches":[38],"directly":[39],"employ":[40],"models":[42,137,146],"distribute":[44],"episode-level":[45],"rewards,":[46],"yielding":[47],"attributions":[48],"that":[49,119],"are":[50],"retrospective":[51,84],"and":[52,111,140],"lack":[53],"theoretical":[54],"grounding.":[55],"We":[56],"propose":[57],"SAVOIR":[58,120],"(ShApley":[59],"Value":[60],"fOr":[61],"SocIal":[62],"RL),":[63],"novel":[65],"principled":[66],"framework":[67],"grounded":[68],"in":[69],"cooperative":[70],"game":[71],"theory.":[72],"Our":[73],"approach":[74],"combines":[75],"two":[76],"complementary":[77],"principles:":[78],"expected":[79],"utility":[80],"shifts":[81],"evaluation":[82,127],"from":[83],"attribution":[85],"prospective":[87],"valuation,":[88],"capturing":[89],"an":[90],"utterance's":[91],"strategic":[92],"potential":[93],"enabling":[95],"favorable":[96],"future":[97],"trajectories;":[98],"Shapley":[99],"values":[100],"ensure":[101],"fair":[102],"distribution":[104],"with":[105,129],"axiomatic":[106],"guarantees":[107],"of":[108],"efficiency,":[109],"symmetry,":[110],"marginality.":[112],"Experiments":[113],"on":[114],"SOTOPIA":[116],"benchmark":[117],"demonstrate":[118],"achieves":[121],"new":[122],"state-of-the-art":[123],"performance":[124],"across":[125],"all":[126],"settings,":[128],"our":[130],"7B":[131],"model":[132],"matching":[133],"or":[134],"exceeding":[135],"proprietary":[136],"including":[138],"GPT-4o":[139],"Claude-3.5-Sonnet.":[141],"Notably,":[142],"even":[143],"large":[144],"reasoning":[145],"consistently":[147],"underperform,":[148],"suggesting":[149],"social":[150],"intelligence":[151],"qualitatively":[153],"different":[154],"capabilities":[155],"than":[156],"analytical":[157],"reasoning.":[158]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-23T00:00:00"}
