{"id":"https://openalex.org/W2921315777","doi":"https://doi.org/10.1109/istel.2018.8661047","title":"Constructing and Evaluating Options in Reinforcement Learning","display_name":"Constructing and Evaluating Options in Reinforcement Learning","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2921315777","doi":"https://doi.org/10.1109/istel.2018.8661047","mag":"2921315777"},"language":"en","primary_location":{"id":"doi:10.1109/istel.2018.8661047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/istel.2018.8661047","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 9th International Symposium on Telecommunications (IST)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052996172","display_name":"Marzieh Davoodabadi Farahani","orcid":null},"institutions":[{"id":"https://openalex.org/I67009956","display_name":"Iran University of Science and Technology","ror":"https://ror.org/01jw2p796","country_code":"IR","type":"education","lineage":["https://openalex.org/I67009956"]}],"countries":["IR"],"is_corresponding":true,"raw_author_name":"Marzieh Davoodabadi Farahani","raw_affiliation_strings":["Computer Engineering Department, Iran University of Science and Technology, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, Iran University of Science and Technology, Tehran, Iran","institution_ids":["https://openalex.org/I67009956"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088710433","display_name":"Nasser Mozayani","orcid":"https://orcid.org/0000-0001-9477-9657"},"institutions":[{"id":"https://openalex.org/I67009956","display_name":"Iran University of Science and Technology","ror":"https://ror.org/01jw2p796","country_code":"IR","type":"education","lineage":["https://openalex.org/I67009956"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Nasser Mozayani","raw_affiliation_strings":["Computer Engineering Department, Iran University of Science and Technology, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, Iran University of Science and Technology, Tehran, Iran","institution_ids":["https://openalex.org/I67009956"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5052996172"],"corresponding_institution_ids":["https://openalex.org/I67009956"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.19037547,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"183","last_page":"186"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9670000076293945,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.908015251159668},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7503645420074463},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7031255960464478},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.5968111157417297},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5262830853462219},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4974234402179718},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.48404139280319214},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4499163329601288},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.2815778851509094},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.06993570923805237}],"concepts":[{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.908015251159668},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7503645420074463},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7031255960464478},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.5968111157417297},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5262830853462219},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4974234402179718},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.48404139280319214},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4499163329601288},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2815778851509094},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.06993570923805237},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/istel.2018.8661047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/istel.2018.8661047","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 9th International Symposium on Telecommunications (IST)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W73694616","https://openalex.org/W163581396","https://openalex.org/W1547361108","https://openalex.org/W1586162706","https://openalex.org/W1592847719","https://openalex.org/W2082973084","https://openalex.org/W2095293504","https://openalex.org/W2109910161","https://openalex.org/W2111020392","https://openalex.org/W2121863487","https://openalex.org/W2130791707","https://openalex.org/W2132202037","https://openalex.org/W2161252410","https://openalex.org/W2400452236","https://openalex.org/W2531640463","https://openalex.org/W4214717370","https://openalex.org/W4249441547","https://openalex.org/W6606734847","https://openalex.org/W6677116005","https://openalex.org/W6712689629","https://openalex.org/W6728441391"],"related_works":["https://openalex.org/W2030816003","https://openalex.org/W4239992647","https://openalex.org/W2076325756","https://openalex.org/W81423522","https://openalex.org/W2150013480","https://openalex.org/W2488264085","https://openalex.org/W1509860481","https://openalex.org/W1989362889","https://openalex.org/W4386206750","https://openalex.org/W1605713622"],"abstract_inverted_index":{"In":[0,16,70],"this":[1,113],"paper,":[2],"we":[3,72],"propose":[4,100],"a":[5,43,84,87,101,123],"new":[6,44],"subgoal":[7],"based":[8],"method":[9,102,114],"for":[10,60,91,103],"automatic":[11],"construction":[12],"of":[13,26,28,53,79,86],"useful":[14,80],"options.":[15],"our":[17],"proposed":[18],"method,":[19],"subgoals":[20],"are":[21,58,110],"considered":[22],"as":[23],"border":[24],"states":[25],"communities":[27],"the":[29,39,54,62],"transition":[30,55],"graph":[31],"created":[32],"after":[33,128],"some":[34],"initial":[35],"agent":[36,63,118],"interactions":[37],"with":[38,112],"environment.":[40],"We":[41,98],"present":[42],"community":[45,66],"detection":[46],"algorithm":[47],"to":[48,67,74],"provide":[49],"an":[50],"appropriate":[51],"partitioning":[52],"graph.":[56],"Macro-actions":[57],"constructed":[59],"taking":[61],"from":[64,117],"one":[65],"other":[68],"communities.":[69],"addition,":[71],"attempt":[73],"capture":[75],"intuitions":[76],"about":[77],"features":[78],"macro-actions.":[81,130],"There":[82],"is":[83],"lack":[85],"generic":[88],"evaluation":[89],"mechanism":[90],"evaluating":[92,104],"each":[93,105],"macro-action":[94,106],"in":[95,126],"previous":[96],"research.":[97],"will":[99],"separately.":[107],"Inappropriate":[108],"macro-actions":[109],"identified":[111],"and":[115],"discarded":[116],"choices.":[119],"Experimental":[120],"results":[121,127],"show":[122],"significant":[124],"improvement":[125],"pruning":[129]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
