{"id":"https://openalex.org/W3090658167","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207473","title":"Automatic Policy Decomposition through Abstract State Space Dynamic Specialization","display_name":"Automatic Policy Decomposition through Abstract State Space Dynamic Specialization","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3090658167","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207473","mag":"3090658167"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn48605.2020.9207473","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207473","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030341346","display_name":"Rene Sturgeon","orcid":null},"institutions":[{"id":"https://openalex.org/I51768193","display_name":"Royal Military College of Canada","ror":"https://ror.org/04yr71909","country_code":"CA","type":"education","lineage":["https://openalex.org/I51768193"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Rene Sturgeon","raw_affiliation_strings":["Mathematics and Computer Science, Royal Military College of Canada, Kingston, Canada"],"affiliations":[{"raw_affiliation_string":"Mathematics and Computer Science, Royal Military College of Canada, Kingston, Canada","institution_ids":["https://openalex.org/I51768193"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085688780","display_name":"Fran\u00e7ois Rivest","orcid":"https://orcid.org/0000-0003-2038-5174"},"institutions":[{"id":"https://openalex.org/I51768193","display_name":"Royal Military College of Canada","ror":"https://ror.org/04yr71909","country_code":"CA","type":"education","lineage":["https://openalex.org/I51768193"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Francois Rivest","raw_affiliation_strings":["Mathematics and Computer Science, Royal Military College of Canada, Kingston, Canada"],"affiliations":[{"raw_affiliation_string":"Mathematics and Computer Science, Royal Military College of Canada, Kingston, Canada","institution_ids":["https://openalex.org/I51768193"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5030341346"],"corresponding_institution_ids":["https://openalex.org/I51768193"],"apc_list":null,"apc_paid":null,"fwci":0.1326,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.55013663,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":"33","issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9739000201225281,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9688000082969666,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8569637537002563},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.728949785232544},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7271686792373657},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.6239044666290283},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5644851326942444},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5579984188079834},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.5552298426628113},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5363816618919373},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5214157104492188},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.5004582405090332},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.48391538858413696},{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.47703880071640015},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4503423869609833},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3695734739303589},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.2536908984184265},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14992231130599976},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13097751140594482}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8569637537002563},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.728949785232544},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7271686792373657},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.6239044666290283},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5644851326942444},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5579984188079834},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.5552298426628113},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5363816618919373},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5214157104492188},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.5004582405090332},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.48391538858413696},{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.47703880071640015},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4503423869609833},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3695734739303589},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2536908984184265},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14992231130599976},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13097751140594482},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn48605.2020.9207473","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207473","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1533861849","https://openalex.org/W2109910161","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2173564293","https://openalex.org/W2267881773","https://openalex.org/W2583993537","https://openalex.org/W2624731731","https://openalex.org/W2736506089","https://openalex.org/W2790924949","https://openalex.org/W2950892788","https://openalex.org/W2951799221","https://openalex.org/W2963142324","https://openalex.org/W2963262099","https://openalex.org/W2963488340","https://openalex.org/W2963615220","https://openalex.org/W2964043796","https://openalex.org/W2964121744","https://openalex.org/W2964227312","https://openalex.org/W3100944043","https://openalex.org/W3103780890","https://openalex.org/W6631190155","https://openalex.org/W6631943919","https://openalex.org/W6685444567","https://openalex.org/W6692846177"],"related_works":["https://openalex.org/W2808418668","https://openalex.org/W3105579180","https://openalex.org/W4287865573","https://openalex.org/W2807018115","https://openalex.org/W2051622126","https://openalex.org/W2025663273","https://openalex.org/W3007324819","https://openalex.org/W2290470984","https://openalex.org/W1574958246","https://openalex.org/W2104958792"],"abstract_inverted_index":{"Significant":[0],"progress":[1],"has":[2,78],"been":[3],"made":[4],"recently":[5],"in":[6,10,18,43,108],"deep":[7,94],"reinforcement":[8],"learning":[9,19,109],"the":[11,44,50,56,66,69,100,110,117,123,143,150,176],"development":[12],"of":[13,23,30,80,112,116,133],"options.":[14],"This":[15],"idea":[16],"consists":[17],"policies":[20],"(or":[21],"macro":[22],"actions)":[24],"for":[25,55,126],"sub-goals.":[26],"An":[27],"important":[28],"bottleneck":[29],"this":[31,60,157],"approach":[32,158],"is":[33,72,136,142,159],"that":[34,68,127,156],"these":[35],"options":[36,166],"are":[37],"often":[38],"available":[39],"as":[40,120,122],"actions":[41],"everywhere":[42],"state":[45,70,118,130],"space,":[46],"hence,":[47],"potentially":[48],"enlarging":[49],"action":[51],"space":[52,71,119],"to":[53,64,138,149,161,165,175],"search":[54],"optimal":[57,124],"policy.":[58,153],"In":[59],"paper,":[61],"we":[62],"propose":[63],"use":[65],"fact":[67],"rarely":[73],"fully":[74],"connected,":[75],"but":[76],"instead":[77],"regions":[79],"highly":[81],"connected":[82],"states":[83],"with":[84],"fewer":[85],"links":[86],"between":[87],"those":[88],"regions.":[89],"Our":[90],"proposed":[91],"model":[92],"extends":[93],"Q-Learning":[95],"network":[96],"(DQN)":[97],"by":[98],"splitting":[99],"top":[101],"layers":[102],"into":[103],"multiple":[104],"heads":[105],"each":[106,134],"specializing":[107],"dynamics":[111],"a":[113,172],"particular":[114],"region":[115],"well":[121],"policy":[125],"region.":[128],"The":[129],"prediction":[131],"quality":[132],"head":[135,141],"used":[137],"determine":[139],"which":[140],"local":[144],"expert,":[145],"rating":[146],"its":[147],"contribution":[148],"current":[151,177],"state's":[152],"We":[154],"show":[155],"able":[160],"learn":[162],"something":[163],"similar":[164],"and":[167],"generalized":[168],"value":[169],"function,":[170],"providing":[171],"promising":[173],"alternative":[174],"approach.":[178]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
