{"id":"https://openalex.org/W2905567628","doi":"https://doi.org/10.1609/aaai.v33i01.33014975","title":"Composable Modular Reinforcement Learning","display_name":"Composable Modular Reinforcement Learning","publication_year":2019,"publication_date":"2019-07-17","ids":{"openalex":"https://openalex.org/W2905567628","doi":"https://doi.org/10.1609/aaai.v33i01.33014975","mag":"2905567628"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v33i01.33014975","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33014975","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4428/4306","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4428/4306","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077935717","display_name":"Christopher Simpkins","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Christopher Simpkins","raw_affiliation_strings":["Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091212723","display_name":"Charles L. Isbell","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Charles Isbell","raw_affiliation_strings":["Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5077935717"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":2.099,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.89871465,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"33","issue":"01","first_page":"4975","last_page":"4982"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9229999780654907,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8805739283561707},{"id":"https://openalex.org/keywords/composability","display_name":"Composability","score":0.8747155070304871},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.8649142980575562},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6927093267440796},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.597262442111969},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.430105984210968},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.42848747968673706},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4209517240524292},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37981680035591125},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1665022075176239},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1617511510848999}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8805739283561707},{"id":"https://openalex.org/C2778814252","wikidata":"https://www.wikidata.org/wiki/Q5156715","display_name":"Composability","level":2,"score":0.8747155070304871},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.8649142980575562},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6927093267440796},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.597262442111969},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.430105984210968},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.42848747968673706},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4209517240524292},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37981680035591125},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1665022075176239},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1617511510848999},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v33i01.33014975","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33014975","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4428/4306","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v33i01.33014975","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33014975","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4428/4306","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2905567628.pdf","grobid_xml":"https://content.openalex.org/works/W2905567628.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W72400652","https://openalex.org/W171213352","https://openalex.org/W186800770","https://openalex.org/W1480676279","https://openalex.org/W1488730473","https://openalex.org/W1492974543","https://openalex.org/W1532534028","https://openalex.org/W1533729189","https://openalex.org/W1543060329","https://openalex.org/W1585861384","https://openalex.org/W1600813180","https://openalex.org/W1661137052","https://openalex.org/W1801398035","https://openalex.org/W1839010533","https://openalex.org/W1965568826","https://openalex.org/W1994075200","https://openalex.org/W1998560962","https://openalex.org/W2015014750","https://openalex.org/W2017580728","https://openalex.org/W2026536622","https://openalex.org/W2033133808","https://openalex.org/W2036910311","https://openalex.org/W2061562262","https://openalex.org/W2089561656","https://openalex.org/W2103582821","https://openalex.org/W2106216496","https://openalex.org/W2109910161","https://openalex.org/W2121517924","https://openalex.org/W2122458975","https://openalex.org/W2136202932","https://openalex.org/W2145739724","https://openalex.org/W2147538364","https://openalex.org/W2155844728","https://openalex.org/W2158548602","https://openalex.org/W2294020159","https://openalex.org/W2335959470","https://openalex.org/W2566555774","https://openalex.org/W2791775562","https://openalex.org/W2963262099","https://openalex.org/W3139377883","https://openalex.org/W4230906437","https://openalex.org/W6631933356","https://openalex.org/W6634945376","https://openalex.org/W6636993184","https://openalex.org/W6649975682","https://openalex.org/W6654448428","https://openalex.org/W6675771355","https://openalex.org/W6678304456","https://openalex.org/W6681672201","https://openalex.org/W6681865777","https://openalex.org/W6683124071","https://openalex.org/W6683443546","https://openalex.org/W6792155000","https://openalex.org/W6805740779"],"related_works":["https://openalex.org/W2383601311","https://openalex.org/W2388607015","https://openalex.org/W186132510","https://openalex.org/W2542453320","https://openalex.org/W2630947271","https://openalex.org/W2159423485","https://openalex.org/W2162232804","https://openalex.org/W1974408264","https://openalex.org/W2484376704","https://openalex.org/W4251089699"],"abstract_inverted_index":{"Modular":[0],"reinforcement":[1,35,53,92],"learning":[2,36,54,93],"(MRL)":[3],"decomposes":[4],"a":[5,13,113],"monolithic":[6],"multiple-goal":[7],"problem":[8,111],"into":[9,42],"modules":[10,49,73,85],"that":[11,62,82,120],"solve":[12,109],"portion":[14],"of":[15,46,59],"the":[16,27,31,57,72,125],"original":[17],"problem.":[18],"The":[19],"modules\u2019":[20],"action":[21,28],"preferences":[22,65],"are":[23],"arbitrated":[24],"to":[25,100,103,132],"determine":[26],"taken":[29],"by":[30],"agent.":[32],"Truly":[33],"modular":[34,52,91],"would":[37],"support":[38],"not":[39,123],"only":[40],"decomposition":[41],"modules,":[43],"but":[44],"composability":[45],"separately":[47,83],"written":[48,84],"in":[50,89],"new":[51,90],"agents.":[55],"However,":[56],"performance":[58,79,127],"MRL":[60],"agents":[61,94],"arbitrate":[63],"module":[64],"using":[66],"additive":[67],"reward":[68,76,106],"schemes":[69],"degrades":[70],"when":[71],"have":[74],"incomparable":[75],"scales.":[77,107],"This":[78],"degradation":[80,128],"means":[81],"cannot":[86],"be":[87,101],"composed":[88],"as-is":[95],"\u2013":[96],"they":[97],"may":[98],"need":[99],"modified":[102],"align":[104],"their":[105],"We":[108],"this":[110],"with":[112],"Q-learningbased":[114],"command":[115],"arbitration":[116],"algorithm":[117],"and":[118],"demonstrate":[119],"it":[121],"does":[122],"exhibit":[124],"same":[126],"as":[129],"existing":[130],"approaches":[131],"MRL,":[133],"thereby":[134],"supporting":[135],"composability.":[136]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
