{"id":"https://openalex.org/W4312458941","doi":"https://doi.org/10.1109/iros47612.2022.9981607","title":"Learning Skills to Navigate without a Master: A Sequential Multi-Policy Reinforcement Learning Algorithm","display_name":"Learning Skills to Navigate without a Master: A Sequential Multi-Policy Reinforcement Learning Algorithm","publication_year":2022,"publication_date":"2022-10-23","ids":{"openalex":"https://openalex.org/W4312458941","doi":"https://doi.org/10.1109/iros47612.2022.9981607"},"language":"en","primary_location":{"id":"doi:10.1109/iros47612.2022.9981607","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros47612.2022.9981607","pdf_url":null,"source":{"id":"https://openalex.org/S4363607704","display_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010292297","display_name":"Ambedkar Dukkipati","orcid":"https://orcid.org/0000-0002-6352-6283"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Ambedkar Dukkipati","raw_affiliation_strings":["Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101701536","display_name":"Rajarshi Banerjee","orcid":"https://orcid.org/0000-0002-2693-0439"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rajarshi Banerjee","raw_affiliation_strings":["Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017304883","display_name":"Ranga Shaarad Ayyagari","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ranga Shaarad Ayyagari","raw_affiliation_strings":["Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101693163","display_name":"Dhaval Parmar","orcid":"https://orcid.org/0000-0002-2823-8083"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Dhaval Parmar Udaybhai","raw_affiliation_strings":["Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5010292297"],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":0.5194,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.64291101,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":"550","issue":null,"first_page":"2483","last_page":"2489"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9678999781608582,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9639999866485596,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8742297887802124},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7988408803939819},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.679581880569458},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5995630025863647},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.518052339553833},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4682793617248535},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.42636987566947937}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8742297887802124},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7988408803939819},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.679581880569458},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5995630025863647},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.518052339553833},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4682793617248535},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.42636987566947937},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros47612.2022.9981607","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros47612.2022.9981607","pdf_url":null,"source":{"id":"https://openalex.org/S4363607704","display_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W64088143","https://openalex.org/W2109910161","https://openalex.org/W2121517924","https://openalex.org/W2151834591","https://openalex.org/W2156256170","https://openalex.org/W2158548602","https://openalex.org/W2160371091","https://openalex.org/W2256970469","https://openalex.org/W2296073425","https://openalex.org/W2530849036","https://openalex.org/W2749807327","https://openalex.org/W2766447205","https://openalex.org/W2781726626","https://openalex.org/W2892515961","https://openalex.org/W2904246096","https://openalex.org/W2949267040","https://openalex.org/W2949604932","https://openalex.org/W2950614095","https://openalex.org/W2963161674","https://openalex.org/W2963544079","https://openalex.org/W2964227312","https://openalex.org/W2967452881","https://openalex.org/W2970427641","https://openalex.org/W2995636097","https://openalex.org/W3037620198","https://openalex.org/W4298857966","https://openalex.org/W4300799055","https://openalex.org/W4301501993","https://openalex.org/W4319988532","https://openalex.org/W6637967152","https://openalex.org/W6683443546","https://openalex.org/W6683821272","https://openalex.org/W6692492022","https://openalex.org/W6734215269","https://openalex.org/W6740801417","https://openalex.org/W6743756900","https://openalex.org/W6744935223","https://openalex.org/W6747473740","https://openalex.org/W6752089545","https://openalex.org/W6757592117","https://openalex.org/W6759871227","https://openalex.org/W6761212738","https://openalex.org/W6763240116","https://openalex.org/W6766861245","https://openalex.org/W6772008794","https://openalex.org/W6779728822","https://openalex.org/W6849896277","https://openalex.org/W6903351479"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W3162204513","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W2586732548"],"abstract_inverted_index":{"Solving":[0],"complex":[1],"problems":[2],"using":[3,86],"reinforcement":[4],"learning":[5,15,38,109],"necessitates":[6],"breaking":[7],"down":[8],"the":[9,96,105,114,134,176,182],"problem":[10],"into":[11,67],"manageable":[12],"tasks,":[13],"and":[14,141,168,181],"policies":[16,39],"to":[17,26,52,80,112,123],"solve":[18],"these":[19],"tasks.":[20],"These":[21],"policies,":[22],"in":[23,48,95,144],"turn,":[24],"have":[25,153],"be":[27],"controlled":[28],"by":[29],"a":[30,68,101,108,145],"master":[31],"policy":[32,70],"that":[33,151,158],"takes":[34],"high-level":[35],"decisions.":[36],"Hence":[37],"involves":[40],"hierarchical":[41,89],"decision":[42],"structures.":[43],"However,":[44],"training":[45],"such":[46,82,164],"methods":[47,163],"practice":[49],"may":[50],"lead":[51],"poor":[53],"generalization,":[54],"with":[55],"either":[56],"sub-policies":[57],"executing":[58],"actions":[59],"for":[60,116],"too":[61],"few":[62],"time":[63],"steps":[64],"or":[65],"devolving":[66],"single":[69],"altogether.":[71],"In":[72],"our":[73,124,137,159],"work,":[74],"we":[75,152],"introduce":[76],"an":[77,87],"alternative":[78],"approach":[79,138],"learn":[81],"skills":[83],"sequentially":[84],"without":[85],"overarching":[88],"policy.":[90],"We":[91,121,132,155],"propose":[92],"this":[93],"method":[94,126,160],"context":[97],"of":[98,104,107,136],"environments":[99],"where":[100],"major":[102],"component":[103],"objective":[106],"agent":[110],"is":[111],"prolong":[113],"episode":[115],"as":[117,119,127,165],"long":[118],"possible.":[120],"refer":[122],"proposed":[125],"Sequential":[128],"Soft":[129,166,169],"Option":[130,170],"Critic.":[131],"demonstrate":[133],"utility":[135],"on":[139,172],"navigation":[140,149],"goal-based":[142],"tasks":[143],"flexible":[146],"simulated":[147],"3D":[148],"environment":[150,180],"developed.":[154],"also":[156],"show":[157],"outperforms":[161],"prior":[162],"Actor-Critic":[167],"Critic":[171],"various":[173],"environments,":[174],"including":[175],"Atari":[177],"River":[178],"Raid":[179],"Gym-Duckietown":[183],"self-driving":[184],"car":[185],"simulator.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
