{"id":"https://openalex.org/W3156248677","doi":"https://doi.org/10.1145/3447268","title":"A Q-values Sharing Framework for Multiagent Reinforcement Learning under Budget Constraint","display_name":"A Q-values Sharing Framework for Multiagent Reinforcement Learning under Budget Constraint","publication_year":2020,"publication_date":"2020-11-28","ids":{"openalex":"https://openalex.org/W3156248677","doi":"https://doi.org/10.1145/3447268","mag":"3156248677"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2011.14281","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.14281","pdf_url":"https://arxiv.org/pdf/2011.14281","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"article","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2011.14281","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053438680","display_name":"Changxi Zhu","orcid":"https://orcid.org/0000-0002-2910-5506"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhu, Changxi","raw_affiliation_strings":["School of Software Engineering, South China University of Technology, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Software Engineering, South China University of Technology, Guangdong, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022683388","display_name":"Ho-fung Leung","orcid":"https://orcid.org/0000-0003-4914-2934"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Leung, Ho-fung","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong and Department of Sociology, The Chinese University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong and Department of Sociology, The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052387391","display_name":"Shuyue Hu","orcid":"https://orcid.org/0000-0002-1908-1344"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Hu, Shuyue","raw_affiliation_strings":["Department of Computer Science, National University of Singapore, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102008984","display_name":"Yi Cai","orcid":"https://orcid.org/0000-0002-9798-2463"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cai, Yi","raw_affiliation_strings":["School of Software Engineering, South China University of Technology and Key Laboratory of Big Data and Intelligent Robot (South China University of Technology), Ministry of Education, Guangzhou, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Software Engineering, South China University of Technology and Key Laboratory of Big Data and Intelligent Robot (South China University of Technology), Ministry of Education, Guangzhou, Guangdong, China","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2708,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.67181001,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8728100061416626},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8255324363708496},{"id":"https://openalex.org/keywords/budget-constraint","display_name":"Budget constraint","score":0.6802549958229065},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5258365869522095},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.49129146337509155},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38226374983787537},{"id":"https://openalex.org/keywords/microeconomics","display_name":"Microeconomics","score":0.12879586219787598},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.06405991315841675}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8728100061416626},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8255324363708496},{"id":"https://openalex.org/C8505890","wikidata":"https://www.wikidata.org/wiki/Q605095","display_name":"Budget constraint","level":2,"score":0.6802549958229065},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5258365869522095},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.49129146337509155},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38226374983787537},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.12879586219787598},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.06405991315841675},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"pmh:oai:arXiv.org:2011.14281","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.14281","pdf_url":"https://arxiv.org/pdf/2011.14281","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2011.14281","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.14281","pdf_url":"https://arxiv.org/pdf/2011.14281","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8189774080","display_name":null,"funder_award_id":"62076100","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1641379095","https://openalex.org/W2007933500","https://openalex.org/W2030520188","https://openalex.org/W2053241734","https://openalex.org/W2082607648","https://openalex.org/W2095989982","https://openalex.org/W2096145798","https://openalex.org/W2133040789","https://openalex.org/W2482556694","https://openalex.org/W2528816995","https://openalex.org/W2531563875","https://openalex.org/W2539402368","https://openalex.org/W2549225575","https://openalex.org/W2552300487","https://openalex.org/W2608239929","https://openalex.org/W2622408375","https://openalex.org/W2739573821","https://openalex.org/W2764552839","https://openalex.org/W2886362482","https://openalex.org/W2913668833","https://openalex.org/W2913756371","https://openalex.org/W2921596426","https://openalex.org/W2921955147","https://openalex.org/W2963939962","https://openalex.org/W2976108375","https://openalex.org/W2978894944","https://openalex.org/W3099324303"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W83893804","https://openalex.org/W2178649091","https://openalex.org/W2353238953","https://openalex.org/W2496037054","https://openalex.org/W2024895661","https://openalex.org/W2390883384","https://openalex.org/W2054108430","https://openalex.org/W3123594720","https://openalex.org/W2360283705"],"abstract_inverted_index":{"In":[0,24,111],"teacher-student":[1],"framework,":[2],"a":[3,38,98],"more":[4],"experienced":[5],"agent":[6,14],"(teacher)":[7],"helps":[8],"accelerate":[9],"the":[10,50,55,66,83,158],"learning":[11,28,107,133],"of":[12,57,68,157,160],"another":[13,75],"(student)":[15],"by":[16,48],"suggesting":[17],"actions":[18,88,162],"to":[19,33,42,118],"take":[20],"in":[21,129],"certain":[22],"states.":[23],"cooperative":[25,104],"multiagent":[26,132],"reinforcement":[27],"(MARL),":[29],"where":[30],"agents":[31,59,71,106],"need":[32],"cooperate":[34,43],"with":[35,45,73,108],"one":[36,74],"another,":[37],"student":[39],"may":[40,91],"fail":[41],"well":[44],"others":[46],"even":[47],"following":[49],"teachers'":[51],"suggested":[52],"actions,":[53],"as":[54,89],"polices":[56],"all":[58],"are":[60],"ever":[61],"changing":[62],"before":[63],"convergence.":[64],"When":[65],"number":[67],"times":[69],"that":[70,86,138],"communicate":[72],"is":[76,80],"limited":[77,150],"(i.e.,":[78],"there":[79],"budget":[81,109],"constraint),":[82],"advising":[84,100,144,161],"strategy":[85],"uses":[87],"advices":[90],"not":[92],"be":[93],"good":[94],"enough.":[95],"We":[96,126],"propose":[97],"partaker-sharer":[99],"framework":[101],"(PSAF)":[102],"for":[103,120],"MARL":[105],"constraint.":[110],"PSAF,":[112],"each":[113],"Q-learner":[114],"can":[115],"decide":[116],"when":[117],"ask":[119],"Q-values":[121,165],"and":[122,149,152,163],"share":[123],"its":[124],"Q-values.":[125],"perform":[127],"experiments":[128],"three":[130],"typical":[131],"problems.":[134],"Evaluation":[135],"results":[136],"show":[137],"our":[139],"approach":[140],"PSAF":[141],"outperforms":[142],"existing":[143],"methods":[145],"under":[146],"both":[147],"unlimited":[148],"budget,":[151],"we":[153],"give":[154],"an":[155],"analysis":[156],"impact":[159],"sharing":[164],"on":[166],"agents'":[167],"learning.":[168]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2021-04-26T00:00:00"}
