{"id":"https://openalex.org/W3003032574","doi":"https://doi.org/10.1109/robio49542.2019.8961464","title":"The Effect of Different Types of Internal Rewards in Distributed Multi-Agent Deep Reinforcement Learning","display_name":"The Effect of Different Types of Internal Rewards in Distributed Multi-Agent Deep Reinforcement Learning","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3003032574","doi":"https://doi.org/10.1109/robio49542.2019.8961464","mag":"3003032574"},"language":"en","primary_location":{"id":"doi:10.1109/robio49542.2019.8961464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robio49542.2019.8961464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101680982","display_name":"Hongda Zhang","orcid":"https://orcid.org/0000-0002-2109-5972"},"institutions":[{"id":"https://openalex.org/I142078773","display_name":"Shenyang Institute of Automation","ror":"https://ror.org/00ft6nj33","country_code":"CN","type":"facility","lineage":["https://openalex.org/I142078773","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongda Zhang","raw_affiliation_strings":["State Key Laboratory of Robotics, Shenyang Institute of Automation Chinese Academy of Sciences, Shenyang, Liaoning, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Robotics, Shenyang Institute of Automation Chinese Academy of Sciences, Shenyang, Liaoning, China","institution_ids":["https://openalex.org/I142078773","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101773900","display_name":"Decai Li","orcid":"https://orcid.org/0000-0003-4140-3861"},"institutions":[{"id":"https://openalex.org/I142078773","display_name":"Shenyang Institute of Automation","ror":"https://ror.org/00ft6nj33","country_code":"CN","type":"facility","lineage":["https://openalex.org/I142078773","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Decai Li","raw_affiliation_strings":["State Key Laboratory of Robotics, Shenyang Institute of Automation Chinese Academy of Sciences, Shenyang, Liaoning, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Robotics, Shenyang Institute of Automation Chinese Academy of Sciences, Shenyang, Liaoning, China","institution_ids":["https://openalex.org/I142078773","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100654736","display_name":"Yuqing He","orcid":"https://orcid.org/0000-0001-5067-1169"},"institutions":[{"id":"https://openalex.org/I142078773","display_name":"Shenyang Institute of Automation","ror":"https://ror.org/00ft6nj33","country_code":"CN","type":"facility","lineage":["https://openalex.org/I142078773","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuqing He","raw_affiliation_strings":["State Key Laboratory of Robotics, Shenyang Institute of Automation Chinese Academy of Sciences, Shenyang, Liaoning, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Robotics, Shenyang Institute of Automation Chinese Academy of Sciences, Shenyang, Liaoning, China","institution_ids":["https://openalex.org/I142078773","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101680982"],"corresponding_institution_ids":["https://openalex.org/I142078773","https://openalex.org/I19820366"],"apc_list":null,"apc_paid":null,"fwci":0.14,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59829689,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2890","last_page":"2895"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9815999865531921,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9815999865531921,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8702590465545654},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.708716630935669},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6428717970848083},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6282095909118652},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5575476884841919},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4606919288635254},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.11400625109672546},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10967046022415161},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.08183911442756653}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8702590465545654},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.708716630935669},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6428717970848083},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6282095909118652},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5575476884841919},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4606919288635254},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11400625109672546},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10967046022415161},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.08183911442756653},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/robio49542.2019.8961464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/robio49542.2019.8961464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2026615874","https://openalex.org/W2602275733","https://openalex.org/W2617547828","https://openalex.org/W2749807327","https://openalex.org/W2756196406","https://openalex.org/W2963871073","https://openalex.org/W4243385754","https://openalex.org/W4293862243","https://openalex.org/W4297789683","https://openalex.org/W4299802797","https://openalex.org/W6707268500","https://openalex.org/W6726754200","https://openalex.org/W6735650757","https://openalex.org/W6738796088","https://openalex.org/W6743367460","https://openalex.org/W6743756900","https://openalex.org/W6744537943"],"related_works":["https://openalex.org/W2366107444","https://openalex.org/W4388145910","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W2381570729","https://openalex.org/W1976205134","https://openalex.org/W4248336175","https://openalex.org/W2031260042","https://openalex.org/W2391445434"],"abstract_inverted_index":{"Distributed":[0],"multiagent":[1,69,103,114,138],"reinforcement":[2,70,139,221],"learning":[3,51,98,104,135,146,175,179],"in":[4,68,215],"the":[5,13,19,23,27,36,41,44,50,57,97,119,134,145,153,157,160,166,173,178,182,195,210,216],"same":[6],"environment":[7,45],"is":[8,29,148,162,188],"prohibitively":[9],"hard,":[10],"due":[11],"to":[12],"difficulty":[14],"of":[15,22,32,100,110,121,137,155,181,186,194,212,218],"assigning":[16],"credit":[17,58],"for":[18,86,92,113,159,209],"individual":[20,93],"actions":[21],"agent,":[24],"especially":[25],"when":[26,144],"agent":[28],"a":[30,33,84,101,207],"member":[31],"team.":[34],"Meanwhile,":[35],"sparse":[37,61,167,197],"delayed":[38,62],"reward":[39,63,79,131,168,187,213],"about":[40],"team":[42,115,161],"from":[43],"such":[46,169],"as":[47,83,170,190,192],"winning":[48,87,171],"makes":[49],"progress":[52],"more":[53],"challenging.":[54],"To":[55,95,199],"solve":[56],"assignment":[59],"and":[60,88,117,152,177],"problems":[64],"which":[65],"are":[66],"common":[67],"learning,":[71,140],"researchers":[72],"usually":[73],"construct":[74],"or":[75],"learn":[76],"an":[77],"internal":[78,111,130],"signal":[80],"that":[81,127,143,193],"acts":[82],"proxy":[85],"provides":[89],"denser":[90],"rewards":[91,112],"agent.":[94],"improve":[96,133],"effect":[99,120,136,180],"typical":[102],"task,":[105],"we":[106],"conducted":[107],"three":[108],"types":[109],"members":[116],"evaluated":[118],"these":[122],"rewards.":[123],"The":[124],"results":[125,204],"show":[126],"not":[128,149,163,189],"all":[129],"can":[132,205],"it":[141],"seems":[142],"task":[147,158],"very":[150,164],"complex":[151],"time":[154],"finishing":[156],"long,":[165],"have":[172],"best":[174],"effect,":[176],"other":[183],"two":[184],"forms":[185],"good":[191],"simple":[196],"reward.":[198],"some":[200],"extent,":[201],"our":[202],"research":[203],"provide":[206],"reference":[208],"design":[211],"function":[214],"application":[217],"distributed":[219],"multi-agent":[220],"learning.":[222]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
