{"id":"https://openalex.org/W4391361641","doi":"https://doi.org/10.3390/a17020060","title":"Learning State-Specific Action Masks for Reinforcement Learning","display_name":"Learning State-Specific Action Masks for Reinforcement Learning","publication_year":2024,"publication_date":"2024-01-30","ids":{"openalex":"https://openalex.org/W4391361641","doi":"https://doi.org/10.3390/a17020060"},"language":"en","primary_location":{"id":"doi:10.3390/a17020060","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a17020060","pdf_url":"https://www.mdpi.com/1999-4893/17/2/60/pdf?version=1706614577","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/17/2/60/pdf?version=1706614577","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100441388","display_name":"Ziyi Wang","orcid":"https://orcid.org/0000-0003-3982-080X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziyi Wang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing 101408, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing 101408, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100381216","display_name":"Xinran Li","orcid":"https://orcid.org/0000-0001-9951-253X"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinran Li","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing 101408, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing 101408, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101298618","display_name":"Luoyang Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Luoyang Sun","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing 101408, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing 101408, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100399691","display_name":"Haifeng Zhang","orcid":"https://orcid.org/0000-0003-2670-6055"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haifeng Zhang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China","Nanjing Artificial Intelligence Research of IA, Jiangning District, Nanjing 211135, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing 101408, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Nanjing Artificial Intelligence Research of IA, Jiangning District, Nanjing 211135, China","institution_ids":[]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing 101408, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103120152","display_name":"Hualin Liu","orcid":"https://orcid.org/0000-0001-9110-6297"},"institutions":[{"id":"https://openalex.org/I98227222","display_name":"China National Petroleum Corporation (China)","ror":"https://ror.org/05269d038","country_code":"CN","type":"company","lineage":["https://openalex.org/I98227222"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hualin Liu","raw_affiliation_strings":["Key Laboratory of Oil & Gas Business Chain Optimization, Petrochina Planning and Engineering Institute, CNPC, Beijing 100083, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Oil & Gas Business Chain Optimization, Petrochina Planning and Engineering Institute, CNPC, Beijing 100083, China","institution_ids":["https://openalex.org/I98227222"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100384727","display_name":"Jun Wang","orcid":"https://orcid.org/0000-0002-4021-4228"},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jun Wang","raw_affiliation_strings":["Computer Science, University College London, London WC1E 6BT, UK"],"affiliations":[{"raw_affiliation_string":"Computer Science, University College London, London WC1E 6BT, UK","institution_ids":["https://openalex.org/I45129253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100399691"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879","https://openalex.org/I4210100255","https://openalex.org/I4210165038"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":3.0976,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.91982841,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"17","issue":"2","first_page":"60","last_page":"60"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6948790550231934},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5771399736404419},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5304654836654663},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.49593672156333923},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.43862223625183105},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39532384276390076},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.22366061806678772},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.14145004749298096},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09012386202812195}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6948790550231934},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5771399736404419},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5304654836654663},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.49593672156333923},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.43862223625183105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39532384276390076},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.22366061806678772},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.14145004749298096},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09012386202812195},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/a17020060","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a17020060","pdf_url":"https://www.mdpi.com/1999-4893/17/2/60/pdf?version=1706614577","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:eprints.ucl.ac.uk.OAI2:10188413","is_oa":true,"landing_page_url":"https://discovery.ucl.ac.uk/id/eprint/10188413/","pdf_url":"https://discovery.ucl.ac.uk/10188413/1/algorithms-17-00060.pdf","source":{"id":"https://openalex.org/S4306400024","display_name":"UCL Discovery (University College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45129253","host_organization_name":"University College London","host_organization_lineage":["https://openalex.org/I45129253"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"   Algorithms , 17  (2)    , Article 60. (2024)      ","raw_type":"Article"},{"id":"pmh:oai:doaj.org/article:99937593c1314cebacb8dfc93b9c358f","is_oa":true,"landing_page_url":"https://doaj.org/article/99937593c1314cebacb8dfc93b9c358f","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 17, Iss 2, p 60 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/a17020060","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a17020060","pdf_url":"https://www.mdpi.com/1999-4893/17/2/60/pdf?version=1706614577","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Life in Land","score":0.5699999928474426,"id":"https://metadata.un.org/sdg/15"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4391361641.pdf"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W1673310716","https://openalex.org/W2058735307","https://openalex.org/W2087232315","https://openalex.org/W2480004914","https://openalex.org/W2787666871","https://openalex.org/W2914921993","https://openalex.org/W2919045150","https://openalex.org/W2963150697","https://openalex.org/W2982316857","https://openalex.org/W3015437096","https://openalex.org/W3087845791","https://openalex.org/W3093210455","https://openalex.org/W3094236223","https://openalex.org/W3101483449","https://openalex.org/W3107951310","https://openalex.org/W3110876951","https://openalex.org/W3123348991","https://openalex.org/W3146607090","https://openalex.org/W3167724086","https://openalex.org/W4296992075","https://openalex.org/W4310580318","https://openalex.org/W4312300737","https://openalex.org/W4382202881","https://openalex.org/W4383108874","https://openalex.org/W4385248029","https://openalex.org/W6712173889","https://openalex.org/W6784390008","https://openalex.org/W6785535465"],"related_works":["https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2076061571","https://openalex.org/W2106552856","https://openalex.org/W1987513656","https://openalex.org/W2072376847","https://openalex.org/W2089013912"],"abstract_inverted_index":{"Efficient":[0],"yet":[1],"sufficient":[2],"exploration":[3],"remains":[4],"a":[5,33,62,87,113,136,148,204],"critical":[6],"challenge":[7],"in":[8,71,207],"reinforcement":[9],"learning":[10,150,210],"(RL),":[11],"especially":[12],"for":[13,65,152],"Markov":[14],"Decision":[15],"Processes":[16],"(MDPs)":[17],"with":[18,73,86,93,103],"vast":[19],"action":[20,30,39,44,69,75],"spaces.":[21],"Previous":[22],"approaches":[23],"have":[24],"commonly":[25],"involved":[26],"projecting":[27],"the":[28,43,68,97,108,126,132,154,208,218],"original":[29],"space":[31,35,70],"into":[32,185],"latent":[34],"or":[36,52],"employing":[37],"environmental":[38],"masks":[40,85],"to":[41,124,140,169,174,179],"reduce":[42],"possibilities.":[45],"Nevertheless,":[46],"these":[47],"methods":[48],"often":[49],"lack":[50],"interpretability":[51],"rely":[53],"on":[54,96,119],"expert":[55],"knowledge.":[56],"In":[57],"this":[58],"study,":[59],"we":[60,111,146],"introduce":[61,112],"novel":[63,114],"method":[64,166],"automatically":[66],"reducing":[67],"environments":[72],"discrete":[74],"spaces":[76],"while":[77],"preserving":[78],"interpretability.":[79],"The":[80],"proposed":[81],"approach":[82],"learns":[83],"state-specific":[84],"dual":[88],"purpose:":[89],"(1)":[90],"eliminating":[91],"actions":[92,102,130],"minimal":[94],"influence":[95],"MDP":[98,133],"and":[99,134,172,178,191,201,212],"(2)":[100],"aggregating":[101],"identical":[104],"behavioral":[105,127],"consequences":[106,128],"within":[107,131],"MDP.":[109],"Specifically,":[110],"concept":[115],"called":[116],"Bisimulation":[117],"Metrics":[118],"Actions":[120],"by":[121,161,217],"States":[122],"(BMAS)":[123],"quantify":[125],"of":[129],"design":[135],"dedicated":[137],"mask":[138,155],"model":[139],"ensure":[141],"their":[142],"binary":[143],"nature.":[144],"Crucially,":[145],"present":[147],"practical":[149],"procedure":[151],"training":[153],"model,":[156],"leveraging":[157],"transition":[158],"data":[159],"collected":[160],"any":[162],"RL":[163,176,188,209],"policy.":[164],"Our":[165],"is":[167,193],"designed":[168],"be":[170],"plug-and-play":[171],"adaptable":[173],"all":[175],"policies,":[177],"validate":[180],"its":[181],"effectiveness,":[182],"an":[183],"integration":[184],"two":[186],"prominent":[187],"algorithms,":[189],"DQN":[190],"PPO,":[192],"performed.":[194],"Experimental":[195],"results":[196],"obtained":[197],"from":[198],"Maze,":[199],"Atari,":[200],"\u03bcRTS2":[202],"reveal":[203],"substantial":[205],"acceleration":[206],"process":[211],"noteworthy":[213],"performance":[214],"improvements":[215],"facilitated":[216],"introduced":[219],"approach.":[220]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
