{"id":"https://openalex.org/W4412434928","doi":"https://doi.org/10.1007/s10994-025-06823-z","title":"Achieving collective welfare in multi-agent reinforcement learning via suggestion sharing","display_name":"Achieving collective welfare in multi-agent reinforcement learning via suggestion sharing","publication_year":2025,"publication_date":"2025-07-15","ids":{"openalex":"https://openalex.org/W4412434928","doi":"https://doi.org/10.1007/s10994-025-06823-z"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-025-06823-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06823-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06823-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06823-z.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115604796","display_name":"Yue Jin","orcid":"https://orcid.org/0000-0003-3291-2584"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yue Jin","raw_affiliation_strings":["Warwick Manufacturing Group, University of Warwick, Coventry, CV4 7AL, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Warwick Manufacturing Group, University of Warwick, Coventry, CV4 7AL, UK","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012143414","display_name":"Shuangqing Wei","orcid":"https://orcid.org/0000-0001-5913-1441"},"institutions":[{"id":"https://openalex.org/I121820613","display_name":"Louisiana State University","ror":"https://ror.org/05ect4e57","country_code":"US","type":"education","lineage":["https://openalex.org/I121820613"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuangqing Wei","raw_affiliation_strings":["School of Electrical Engineering and Computer Science, Louisiana State University, Baton Rouge, LA, 70803, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Computer Science, Louisiana State University, Baton Rouge, LA, 70803, USA","institution_ids":["https://openalex.org/I121820613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010581004","display_name":"Giovanni Montana","orcid":"https://orcid.org/0000-0003-3942-3900"},"institutions":[{"id":"https://openalex.org/I125680101","display_name":"Turing Institute","ror":"https://ror.org/02x2mw849","country_code":"GB","type":"facility","lineage":["https://openalex.org/I125680101"]},{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]},{"id":"https://openalex.org/I4210128584","display_name":"The Alan Turing Institute","ror":"https://ror.org/035dkdb55","country_code":"GB","type":"facility","lineage":["https://openalex.org/I4210128584"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Giovanni Montana","raw_affiliation_strings":["Department of Statistics, University of Warwick, Coventry, CV4 7AL, UK","The Alan Turing Institute, 96 Euston Road, London, NW1 2DB, UK","Warwick Manufacturing Group, University of Warwick, Coventry, CV4 7AL, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Statistics, University of Warwick, Coventry, CV4 7AL, UK","institution_ids":["https://openalex.org/I39555362"]},{"raw_affiliation_string":"The Alan Turing Institute, 96 Euston Road, London, NW1 2DB, UK","institution_ids":["https://openalex.org/I125680101","https://openalex.org/I4210128584"]},{"raw_affiliation_string":"Warwick Manufacturing Group, University of Warwick, Coventry, CV4 7AL, UK","institution_ids":["https://openalex.org/I39555362"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5010581004"],"corresponding_institution_ids":["https://openalex.org/I125680101","https://openalex.org/I39555362","https://openalex.org/I4210128584"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":5.1936,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.95264106,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"114","issue":"8","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9714999794960022,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11252","display_name":"Evolutionary Game Theory and Cooperation","score":0.9383999705314636,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.651542603969574},{"id":"https://openalex.org/keywords/welfare","display_name":"Welfare","score":0.6056280136108398},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.522050142288208},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.45390334725379944},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40956851840019226},{"id":"https://openalex.org/keywords/microeconomics","display_name":"Microeconomics","score":0.3423374891281128},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2685982584953308},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.2459508180618286},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.24175500869750977},{"id":"https://openalex.org/keywords/market-economy","display_name":"Market economy","score":0.07608747482299805}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.651542603969574},{"id":"https://openalex.org/C100243477","wikidata":"https://www.wikidata.org/wiki/Q12002092","display_name":"Welfare","level":2,"score":0.6056280136108398},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.522050142288208},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45390334725379944},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40956851840019226},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.3423374891281128},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2685982584953308},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.2459508180618286},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.24175500869750977},{"id":"https://openalex.org/C34447519","wikidata":"https://www.wikidata.org/wiki/Q179522","display_name":"Market economy","level":1,"score":0.07608747482299805}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s10994-025-06823-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06823-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06823-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},{"id":"pmh:oai:repository.lsu.edu:eecs_pubs-3850","is_oa":true,"landing_page_url":"https://repository.lsu.edu/eecs_pubs/2847","pdf_url":null,"source":{"id":"https://openalex.org/S4210169993","display_name":"Civil War Book Review","issn_l":"1528-6592","issn":["1528-6592"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310315936","host_organization_name":"Louisiana State University","host_organization_lineage":["https://openalex.org/P4310315936"],"host_organization_lineage_names":["Louisiana State University"],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Faculty Publications","raw_type":"text"}],"best_oa_location":{"id":"doi:10.1007/s10994-025-06823-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06823-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06823-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3228942994","display_name":"Turing AI Fellowship: Advancing Multi-Agent Deep Reinforcement Learning for Sequential Decision Making in Real-World Applications","funder_award_id":"EP/V024868/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320314731","display_name":"UK Research and Innovation","ror":"https://ror.org/001aqnf71"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412434928.pdf","grobid_xml":"https://content.openalex.org/works/W4412434928.grobid-xml"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W1988293491","https://openalex.org/W2106713015","https://openalex.org/W2122253967","https://openalex.org/W2157657940","https://openalex.org/W2175723801","https://openalex.org/W2592798481","https://openalex.org/W2594794854","https://openalex.org/W2604873668","https://openalex.org/W2612690371","https://openalex.org/W2623431351","https://openalex.org/W2758442112","https://openalex.org/W2891287243","https://openalex.org/W2891661335","https://openalex.org/W2894976951","https://openalex.org/W2911743772","https://openalex.org/W2913326990","https://openalex.org/W2914351253","https://openalex.org/W2915117209","https://openalex.org/W2963244439","https://openalex.org/W2990138404","https://openalex.org/W2995815314","https://openalex.org/W3010758700","https://openalex.org/W3108171015","https://openalex.org/W3128366769","https://openalex.org/W3132069826","https://openalex.org/W3153787838","https://openalex.org/W3154844346","https://openalex.org/W3163894240","https://openalex.org/W3163926178","https://openalex.org/W3174406242","https://openalex.org/W3200561352","https://openalex.org/W3203898015","https://openalex.org/W3214814008","https://openalex.org/W3217297494","https://openalex.org/W4223424505","https://openalex.org/W4283730654","https://openalex.org/W4285114354","https://openalex.org/W4287155308","https://openalex.org/W4292941517","https://openalex.org/W4312277898","https://openalex.org/W4312741895","https://openalex.org/W4315630270","https://openalex.org/W4320015948","https://openalex.org/W4385764866","https://openalex.org/W6638018090","https://openalex.org/W6680008307","https://openalex.org/W6721101288","https://openalex.org/W6789076383","https://openalex.org/W6803290710"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Abstract":[0],"In":[1],"human":[2],"society,":[3],"the":[4,21,24,72,128,147,163],"conflict":[5,70],"between":[6,149],"self-interest":[7],"and":[8,26,85,151],"collective":[9,63,73,150,164],"well-being":[10],"often":[11],"obstructs":[12],"efforts":[13],"to":[14,54,61,91,94,117,130],"achieve":[15],"shared":[16],"welfare.":[17],"Related":[18],"concepts":[19],"like":[20],"Tragedy":[22],"of":[23],"Commons":[25],"Social":[27],"Dilemmas":[28],"frequently":[29],"manifest":[30],"in":[31],"our":[32,139,170],"daily":[33],"lives.":[34],"As":[35],"artificial":[36],"agents":[37,93,106],"increasingly":[38],"serve":[39],"as":[40],"autonomous":[41],"proxies":[42],"for":[43],"humans,":[44],"we":[45,99],"propose":[46,100],"a":[47,101,144],"novel":[48,102],"multi-agent":[49],"reinforcement":[50],"learning":[51,59],"(MARL)":[52],"method":[53,111],"address":[55],"this":[56],"issue":[57],"-":[58],"policies":[60,86],"maximise":[62],"returns":[64],"even":[65],"when":[66],"individual":[67,152],"agents\u2019":[68,160],"interests":[69],"with":[71,162,174],"one.":[74],"Unlike":[75],"traditional":[76],"cooperative":[77],"MARL":[78,103],"solutions":[79],"that":[80,142,169,176],"involve":[81],"sharing":[82,118,156,182],"rewards,":[83,119],"values,":[84,120],"or":[87,121,180,183],"designing":[88],"intrinsic":[89,132,184],"rewards":[90],"encourage":[92],"learn":[95],"collectively":[96],"optimal":[97],"policies,":[98,122],"approach":[104],"where":[105],"exchange":[107],"action":[108],"suggestions.":[109],"Our":[110,134],"reveals":[112],"less":[113],"private":[114],"information":[115],"compared":[116],"while":[123],"enabling":[124],"effective":[125],"cooperation":[126],"without":[127],"need":[129],"design":[131],"rewards.":[133,185],"algorithm":[135,171],"is":[136],"supported":[137],"by":[138],"theoretical":[140],"analysis":[141],"establishes":[143],"bound":[145],"on":[146,178],"discrepancy":[148],"objectives,":[153],"demonstrating":[154],"how":[155],"suggestions":[157],"can":[158],"align":[159],"behaviours":[161],"objective.":[165],"Experimental":[166],"results":[167],"demonstrate":[168],"performs":[172],"competitively":[173],"baselines":[175],"rely":[177],"value":[179],"policy":[181]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-14T07:44:22.658603","created_date":"2025-10-10T00:00:00"}
