{"id":"https://openalex.org/W4404998025","doi":"https://doi.org/10.1145/3706110","title":"Credible Negotiation for Multi-agent Reinforcement Learning in Long-term Coordination","display_name":"Credible Negotiation for Multi-agent Reinforcement Learning in Long-term Coordination","publication_year":2024,"publication_date":"2024-12-04","ids":{"openalex":"https://openalex.org/W4404998025","doi":"https://doi.org/10.1145/3706110"},"language":"en","primary_location":{"id":"doi:10.1145/3706110","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3706110","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3706110","source":{"id":"https://openalex.org/S16632050","display_name":"ACM Transactions on Autonomous and Adaptive Systems","issn_l":"1556-4665","issn":["1556-4665","1556-4703"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Autonomous and Adaptive Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3706110","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101645356","display_name":"Tianlong Gu","orcid":"https://orcid.org/0000-0002-1593-1292"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianlong Gu","raw_affiliation_strings":["Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin, China and Engineering Research Center of Trustworthy AI, Jinan University, Guangzhou, China","Guilin University of Electronic Technology, Engineering Research Center of Trustworthy AI, Ministry of Education, Jinan University, China"],"raw_orcid":"https://orcid.org/0000-0002-1593-1292","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin, China and Engineering Research Center of Trustworthy AI, Jinan University, Guangzhou, China","institution_ids":["https://openalex.org/I5343935"]},{"raw_affiliation_string":"Guilin University of Electronic Technology, Engineering Research Center of Trustworthy AI, Ministry of Education, Jinan University, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114990994","display_name":"Taihang Zhi","orcid":"https://orcid.org/0009-0002-3612-2862"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Taihang Zhi","raw_affiliation_strings":["Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin, China","Guilin University of Electronic Technology, China"],"raw_orcid":"https://orcid.org/0009-0002-3612-2862","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin, China","institution_ids":["https://openalex.org/I5343935"]},{"raw_affiliation_string":"Guilin University of Electronic Technology, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080696960","display_name":"Xuguang Bao","orcid":"https://orcid.org/0000-0001-9950-0053"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuguang Bao","raw_affiliation_strings":["Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin, China","Guilin University of Electronic Technology, China"],"raw_orcid":"https://orcid.org/0000-0001-9950-0053","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin, China","institution_ids":["https://openalex.org/I5343935"]},{"raw_affiliation_string":"Guilin University of Electronic Technology, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015299890","display_name":"Liang Chang","orcid":"https://orcid.org/0000-0002-7262-4707"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Chang","raw_affiliation_strings":["Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin, China","Guilin University of Electronic Technology, China"],"raw_orcid":"https://orcid.org/0000-0002-7262-4707","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin, China","institution_ids":["https://openalex.org/I5343935"]},{"raw_affiliation_string":"Guilin University of Electronic Technology, China","institution_ids":["https://openalex.org/I5343935"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2219,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.83711894,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"20","issue":"1","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10646","display_name":"Experimental Behavioral Economics Studies","score":0.9333999752998352,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8726224899291992},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7784767150878906},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.725182831287384},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.6046369671821594},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.591475784778595},{"id":"https://openalex.org/keywords/coordination-game","display_name":"Coordination game","score":0.5023941993713379},{"id":"https://openalex.org/keywords/correlated-equilibrium","display_name":"Correlated equilibrium","score":0.5003559589385986},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4910156726837158},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.46184083819389343},{"id":"https://openalex.org/keywords/negotiation","display_name":"Negotiation","score":0.4328697621822357},{"id":"https://openalex.org/keywords/equilibrium-selection","display_name":"Equilibrium selection","score":0.3473702073097229},{"id":"https://openalex.org/keywords/game-theory","display_name":"Game theory","score":0.34077394008636475},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.30663084983825684},{"id":"https://openalex.org/keywords/repeated-game","display_name":"Repeated game","score":0.3018590211868286},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2951715588569641},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.24125677347183228},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12624463438987732}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8726224899291992},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7784767150878906},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.725182831287384},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.6046369671821594},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.591475784778595},{"id":"https://openalex.org/C107257861","wikidata":"https://www.wikidata.org/wiki/Q656316","display_name":"Coordination game","level":2,"score":0.5023941993713379},{"id":"https://openalex.org/C163630976","wikidata":"https://www.wikidata.org/wiki/Q964667","display_name":"Correlated equilibrium","level":5,"score":0.5003559589385986},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4910156726837158},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.46184083819389343},{"id":"https://openalex.org/C199776023","wikidata":"https://www.wikidata.org/wiki/Q202875","display_name":"Negotiation","level":2,"score":0.4328697621822357},{"id":"https://openalex.org/C164407509","wikidata":"https://www.wikidata.org/wiki/Q5384490","display_name":"Equilibrium selection","level":4,"score":0.3473702073097229},{"id":"https://openalex.org/C177142836","wikidata":"https://www.wikidata.org/wiki/Q44455","display_name":"Game theory","level":2,"score":0.34077394008636475},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.30663084983825684},{"id":"https://openalex.org/C202556891","wikidata":"https://www.wikidata.org/wiki/Q1584646","display_name":"Repeated game","level":3,"score":0.3018590211868286},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2951715588569641},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.24125677347183228},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12624463438987732},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3706110","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3706110","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3706110","source":{"id":"https://openalex.org/S16632050","display_name":"ACM Transactions on Autonomous and Adaptive Systems","issn_l":"1556-4665","issn":["1556-4665","1556-4703"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Autonomous and Adaptive Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3706110","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3706110","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3706110","source":{"id":"https://openalex.org/S16632050","display_name":"ACM Transactions on Autonomous and Adaptive Systems","issn_l":"1556-4665","issn":["1556-4665","1556-4703"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Autonomous and Adaptive Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8327241554","display_name":null,"funder_award_id":"U22A2099","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4404998025.pdf"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W1502765764","https://openalex.org/W1542941925","https://openalex.org/W1964167056","https://openalex.org/W2089415692","https://openalex.org/W2099618002","https://openalex.org/W2120846115","https://openalex.org/W2145339207","https://openalex.org/W2552300487","https://openalex.org/W2617547828","https://openalex.org/W2756196406","https://openalex.org/W2758442112","https://openalex.org/W2802589227","https://openalex.org/W2810602713","https://openalex.org/W2908261578","https://openalex.org/W2960876848","https://openalex.org/W2963485523","https://openalex.org/W2963658727","https://openalex.org/W2963762747","https://openalex.org/W2971027212","https://openalex.org/W2973525135","https://openalex.org/W2982316857","https://openalex.org/W2997072274","https://openalex.org/W2998279575","https://openalex.org/W3099518626","https://openalex.org/W3100019413","https://openalex.org/W3107615218","https://openalex.org/W3117236156","https://openalex.org/W3174032342","https://openalex.org/W3200561352","https://openalex.org/W3203930949","https://openalex.org/W3206560080","https://openalex.org/W4289100326","https://openalex.org/W4289751798","https://openalex.org/W4297945402","https://openalex.org/W4301501993","https://openalex.org/W4322768657","https://openalex.org/W6894328656"],"related_works":["https://openalex.org/W2778030638","https://openalex.org/W2181559844","https://openalex.org/W4365460918","https://openalex.org/W2187345782","https://openalex.org/W3203771163","https://openalex.org/W2590634914","https://openalex.org/W3187966265","https://openalex.org/W4280496625","https://openalex.org/W3196383554","https://openalex.org/W2964096659"],"abstract_inverted_index":{"The":[0,15,146],"coordination":[1,60],"of":[2,6,18,55,123,178],"multi-agent":[3],"is":[4,48,81,102,118,149,155],"one":[5],"the":[7,35,51,58,64,73,100,106,110,116,121,126,137,152,158,164,176],"critical":[8],"problems":[9],"in":[10,38,57,97,112,125,180],"Multi-agent":[11],"Reinforcement":[12],"Learning":[13],"(MARL).":[14],"traditional":[16,86],"methods":[17],"MARL":[19],"focus":[20],"on":[21,136],"finding":[22],"a":[23,45,82,90,130,171],"stochastically":[24],"acceptable":[25],"solution":[26,80],"called":[27],"Nash":[28],"Equilibrium":[29,141],"(NE)":[30],"for":[31,50,85],"all":[32],"agents":[33,124,179],"from":[34],"Markov":[36],"Game":[37],"which":[39,174],"multiple":[40],"equilibria":[41],"exist.":[42],"However,":[43],"learning":[44,76,133,153],"fair":[46,91,172],"equilibrium":[47],"crucial":[49],"sustainability":[52],"and":[53,115,151],"stability":[54],"collaboration":[56],"long-term":[59],"game,":[61,114],"especially":[62],"when":[63],"leadership":[65],"competition":[66],"exists.":[67],"In":[68],"this":[69],"article,":[70],"we":[71,104],"propose":[72,105],"bi-level":[74,131],"reinforcement":[75],"method":[77,134,167],"N-Bi-AC,":[78],"whose":[79],"Pareto":[83],"improvement":[84],"NE,":[87,173],"to":[88,108,119,170],"choose":[89],"Equilibrium.":[92],"There":[93],"are":[94],"two":[95],"parts":[96],"our":[98],"method,":[99],"first":[101],"that":[103,163],"Negotiator":[107],"determine":[109],"leader":[111],"stage":[113],"other":[117],"update":[120],"Q-value":[122],"game":[127,183],"by":[128],"using":[129],"actor-critic":[132],"based":[135],"Joint":[138],"Mixed":[139],"Strategy":[140],"Q-learning":[142],"algorithm":[143,154],"(JMSE":[144],"Q-learning).":[145],"convergence":[147],"proof":[148],"given,":[150],"compared":[156],"with":[157],"state-of-the-art":[159],"algorithms.":[160],"We":[161],"found":[162],"proposed":[165],"N-Bi-AC":[166],"successfully":[168],"converged":[169],"guarantees":[175],"fairness":[177],"different":[181],"matrix":[182],"environments.":[184]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
