{"id":"https://openalex.org/W2160913278","doi":"https://doi.org/10.1109/iros.2004.1389904","title":"Multi-agent reinforcement learning: using macro actions to learn a mating task","display_name":"Multi-agent reinforcement learning: using macro actions to learn a mating task","publication_year":2005,"publication_date":"2005-04-12","ids":{"openalex":"https://openalex.org/W2160913278","doi":"https://doi.org/10.1109/iros.2004.1389904","mag":"2160913278"},"language":"en","primary_location":{"id":"doi:10.1109/iros.2004.1389904","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2004.1389904","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2004 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS) (IEEE Cat. No.04CH37566)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068305732","display_name":"Stefan Elfwing","orcid":"https://orcid.org/0000-0001-6689-1000"},"institutions":[{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]},{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]},{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]},{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["JP","SE"],"is_corresponding":true,"raw_author_name":"S. Elfwing","raw_affiliation_strings":["CREST, Japan Science and Technology Agency, Kyoto, Japan","Centre for Autonomous Systems, Numerical Analysis and Computer Science, Royal Institute of Technology, Stockholm, Sweden","Department of Computational Neurobiology, ATR Computational Neuroscience Laboratories, Japan","Neural Computation Project, Initial Research Project, Okinawa Institute of Science and Technology, Gushikawa, Okinawa, Japan"],"affiliations":[{"raw_affiliation_string":"CREST, Japan Science and Technology Agency, Kyoto, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"Centre for Autonomous Systems, Numerical Analysis and Computer Science, Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]},{"raw_affiliation_string":"Department of Computational Neurobiology, ATR Computational Neuroscience Laboratories, Japan","institution_ids":["https://openalex.org/I4210129730"]},{"raw_affiliation_string":"Neural Computation Project, Initial Research Project, Okinawa Institute of Science and Technology, Gushikawa, Okinawa, Japan","institution_ids":["https://openalex.org/I142637625"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031054137","display_name":"Eiji Uchibe","orcid":"https://orcid.org/0000-0001-7908-0258"},"institutions":[{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]},{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]},{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"E. Uchibe","raw_affiliation_strings":["CREST, Japan Science and Technology Agency, Kyoto, Japan","Department of Computational Neurobiology, ATR Computational Neuroscience Laboratories, Japan","Neural Computation Project, Initial Research Project, Okinawa Institute of Science and Technology, Gushikawa, Okinawa, Japan"],"affiliations":[{"raw_affiliation_string":"CREST, Japan Science and Technology Agency, Kyoto, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"Department of Computational Neurobiology, ATR Computational Neuroscience Laboratories, Japan","institution_ids":["https://openalex.org/I4210129730"]},{"raw_affiliation_string":"Neural Computation Project, Initial Research Project, Okinawa Institute of Science and Technology, Gushikawa, Okinawa, Japan","institution_ids":["https://openalex.org/I142637625"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004840638","display_name":"Kenji Doya","orcid":"https://orcid.org/0000-0002-2446-6820"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]},{"id":"https://openalex.org/I142637625","display_name":"Okinawa Institute of Science and Technology Graduate University","ror":"https://ror.org/02qg15b79","country_code":"JP","type":"education","lineage":["https://openalex.org/I142637625"]},{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"K. Doya","raw_affiliation_strings":["CREST, Japan Science and Technology Agency, Kyoto, Japan","Department of Computational Neurobiology, ATR Computational Neuroscience Laboratories, Japan","Neural Computation Project, Initial Research Project, Okinawa Institute of Science and Technology, Gushikawa, Okinawa, Japan"],"affiliations":[{"raw_affiliation_string":"CREST, Japan Science and Technology Agency, Kyoto, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"Department of Computational Neurobiology, ATR Computational Neuroscience Laboratories, Japan","institution_ids":["https://openalex.org/I4210129730"]},{"raw_affiliation_string":"Neural Computation Project, Initial Research Project, Okinawa Institute of Science and Technology, Gushikawa, Okinawa, Japan","institution_ids":["https://openalex.org/I142637625"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066237365","display_name":"Henrik I. Christensen","orcid":"https://orcid.org/0000-0002-7465-7502"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"H.I. Christensen","raw_affiliation_strings":["Centre for Autonomous Systems, Numerical Analysis and Computer Science, Royal Institute of Technology, Stockholm, Sweden"],"affiliations":[{"raw_affiliation_string":"Centre for Autonomous Systems, Numerical Analysis and Computer Science, Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5068305732"],"corresponding_institution_ids":["https://openalex.org/I142637625","https://openalex.org/I4210086780","https://openalex.org/I4210129730","https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":6.8504,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.96760451,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"4","issue":null,"first_page":"3164","last_page":"3169"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10100","display_name":"Metaheuristic Optimization Algorithms Research","score":0.9764999747276306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8715906143188477},{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.8086323738098145},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7477465867996216},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6684295535087585},{"id":"https://openalex.org/keywords/action-selection","display_name":"Action selection","score":0.5584250688552856},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5474607348442078},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5414396524429321},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5040711164474487},{"id":"https://openalex.org/keywords/embodied-agent","display_name":"Embodied agent","score":0.47979360818862915},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4442690312862396},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4209336042404175},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.3860507607460022},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3584122657775879},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09798601269721985}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8715906143188477},{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.8086323738098145},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7477465867996216},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6684295535087585},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.5584250688552856},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5474607348442078},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5414396524429321},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5040711164474487},{"id":"https://openalex.org/C103683099","wikidata":"https://www.wikidata.org/wiki/Q5370102","display_name":"Embodied agent","level":3,"score":0.47979360818862915},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4442690312862396},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4209336042404175},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.3860507607460022},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3584122657775879},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09798601269721985},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iros.2004.1389904","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2004.1389904","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2004 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS) (IEEE Cat. No.04CH37566)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.66.7062","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.66.7062","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.nada.kth.se/~hic/hic-papers/iros04-elfwing.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1513468570","https://openalex.org/W1542941925","https://openalex.org/W1572462007","https://openalex.org/W1605202242","https://openalex.org/W1681475672","https://openalex.org/W1976051517","https://openalex.org/W2109910161","https://openalex.org/W2121863487","https://openalex.org/W2167261463","https://openalex.org/W2171426878","https://openalex.org/W2535652371","https://openalex.org/W4214717370","https://openalex.org/W6728376379"],"related_works":["https://openalex.org/W1601503673","https://openalex.org/W1487956045","https://openalex.org/W1592154258","https://openalex.org/W1527882169","https://openalex.org/W4388039923","https://openalex.org/W1596535966","https://openalex.org/W2096246921","https://openalex.org/W2145935766","https://openalex.org/W2136562935","https://openalex.org/W2185743013"],"abstract_inverted_index":{"Standard":[0],"reinforcement":[1,44],"learning":[2,10,50,97,163,179],"methods":[3],"are":[4,82],"inefficient":[5],"and":[6,107,156,196],"often":[7],"inadequate":[8],"for":[9,86,114,177],"cooperative":[11,125],"multi-agent":[12],"tasks.":[13],"For":[14],"these":[15],"kinds":[16],"of":[17,21,48,101,149,162],"tasks":[18],"the":[19,35,49,55,60,96,102,110,115,130,141,150,160,167,178,183],"behavior":[20,111,189],"one":[22,89],"agent":[23],"strongly":[24],"depends":[25],"on":[26],"dynamic":[27],"interaction":[28,36],"with":[29,34,37,180],"other":[30,61,116],"agents,":[31],"not":[32],"only":[33],"a":[38,75,124,172,186],"static":[39],"environment":[40],"as":[41],"in":[42,84,154,159,190,193],"standard":[43],"learning.":[45],"The":[46,92],"success":[47],"is":[51,129,145],"therefore":[52],"coupled":[53],"to":[54,58,69,136,170],"agents'":[56],"ability":[57],"predict":[59],"agents":[62,168,184],"behaviors.":[63],"In":[64,118,175],"this":[65,71,119],"study":[66,120],"we":[67,121],"try":[68],"overcome":[70],"problem":[72],"by":[73,98],"adding":[74],"few":[76],"simple":[77],"macro":[78,93,165,181],"actions,":[79,166],"actions":[80,94],"that":[81,158],"extended":[83],"time":[85,90],"more":[87,105,112],"than":[88],"step.":[91],"improve":[95],"making":[99,109],"search":[100],"state":[103],"space":[104],"effective":[106],"thereby":[108],"predictable":[113],"agent.":[117],"have":[122],"considered":[123],"mating":[126,188],"task,":[127],"which":[128],"first":[131],"step":[132],"towards":[133],"our":[134],"aim":[135],"perform":[137],"embodied":[138],"evolution,":[139],"where":[140],"evolutionary":[142],"selection":[143],"process":[144],"an":[146],"integrated":[147],"part":[148],"task.":[151],"We":[152],"show,":[153],"simulation":[155,195],"hardware,":[157],"case":[161],"without":[164],"fail":[169],"learn":[171,185],"meaningful":[173],"behavior.":[174],"contrast,":[176],"action":[182],"good":[187],"reasonable":[191],"time,":[192],"both":[194],"hardware.":[197]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
