{"id":"https://openalex.org/W4298128015","doi":"https://doi.org/10.1109/ro-man53752.2022.9900738","title":"A Sample Efficiency Improved Method via Hierarchical Reinforcement Learning Networks","display_name":"A Sample Efficiency Improved Method via Hierarchical Reinforcement Learning Networks","publication_year":2022,"publication_date":"2022-08-29","ids":{"openalex":"https://openalex.org/W4298128015","doi":"https://doi.org/10.1109/ro-man53752.2022.9900738"},"language":"en","primary_location":{"id":"doi:10.1109/ro-man53752.2022.9900738","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ro-man53752.2022.9900738","pdf_url":null,"source":{"id":"https://openalex.org/S4363607995","display_name":"2022 31st IEEE International Conference on Robot and Human Interactive Communication (RO-MAN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 31st IEEE International Conference on Robot and Human Interactive Communication (RO-MAN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100745143","display_name":"Qinghua Chen","orcid":"https://orcid.org/0000-0002-4726-3276"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I177721651","display_name":"Oakland University","ror":"https://ror.org/01ythxj32","country_code":"US","type":"education","lineage":["https://openalex.org/I177721651"]},{"id":"https://openalex.org/I23171815","display_name":"Zhengzhou University of Light Industry","ror":"https://ror.org/05fwr8z16","country_code":"CN","type":"education","lineage":["https://openalex.org/I23171815"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Qinghua Chen","raw_affiliation_strings":["Oakland University,Intelligent Robotics Laboratory,MI,USA,48309","College of Electrical and Information Engineering, Zhengzhou University of Light Industry, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oakland University,Intelligent Robotics Laboratory,MI,USA,48309","institution_ids":["https://openalex.org/I177721651","https://openalex.org/I1343180700"]},{"raw_affiliation_string":"College of Electrical and Information Engineering, Zhengzhou University of Light Industry, Henan, China","institution_ids":["https://openalex.org/I23171815"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071823834","display_name":"Evan Dallas","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I177721651","display_name":"Oakland University","ror":"https://ror.org/01ythxj32","country_code":"US","type":"education","lineage":["https://openalex.org/I177721651"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Evan Dallas","raw_affiliation_strings":["Oakland University,Intelligent Robotics Laboratory,MI,USA,48309"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oakland University,Intelligent Robotics Laboratory,MI,USA,48309","institution_ids":["https://openalex.org/I177721651","https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087364462","display_name":"Pourya Shahverdi","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I177721651","display_name":"Oakland University","ror":"https://ror.org/01ythxj32","country_code":"US","type":"education","lineage":["https://openalex.org/I177721651"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pourya Shahverdi","raw_affiliation_strings":["Oakland University,Intelligent Robotics Laboratory,MI,USA,48309"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oakland University,Intelligent Robotics Laboratory,MI,USA,48309","institution_ids":["https://openalex.org/I177721651","https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003274227","display_name":"Jessica Korneder","orcid":null},"institutions":[{"id":"https://openalex.org/I177721651","display_name":"Oakland University","ror":"https://ror.org/01ythxj32","country_code":"US","type":"education","lineage":["https://openalex.org/I177721651"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jessica Korneder","raw_affiliation_strings":["Oakland University,Applied Behavior Analysis Clinic,MI,USA,48309"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oakland University,Applied Behavior Analysis Clinic,MI,USA,48309","institution_ids":["https://openalex.org/I177721651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048406290","display_name":"Osamah Rawashdeh","orcid":null},"institutions":[{"id":"https://openalex.org/I177721651","display_name":"Oakland University","ror":"https://ror.org/01ythxj32","country_code":"US","type":"education","lineage":["https://openalex.org/I177721651"]},{"id":"https://openalex.org/I4210145666","display_name":"Embedded Systems (United States)","ror":"https://ror.org/04742eh45","country_code":"US","type":"company","lineage":["https://openalex.org/I4210145666"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Osamah A. Rawashdeh","raw_affiliation_strings":["Oakland University,Embedded Systems Research Lab,MI,USA,48309"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oakland University,Embedded Systems Research Lab,MI,USA,48309","institution_ids":["https://openalex.org/I4210145666","https://openalex.org/I177721651"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088460626","display_name":"Wing-Yue Geoffrey Louie","orcid":"https://orcid.org/0000-0002-2742-6947"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I177721651","display_name":"Oakland University","ror":"https://ror.org/01ythxj32","country_code":"US","type":"education","lineage":["https://openalex.org/I177721651"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wing-Yue Geoffrey Louie","raw_affiliation_strings":["Oakland University,Intelligent Robotics Laboratory,MI,USA,48309"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oakland University,Intelligent Robotics Laboratory,MI,USA,48309","institution_ids":["https://openalex.org/I177721651","https://openalex.org/I1343180700"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5191,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.63890803,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1498","last_page":"1505"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9434000253677368,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7995254993438721},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7716368436813354},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7095137238502502},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6150903105735779},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5853754281997681},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5771809816360474},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5719988942146301},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.553617000579834},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.4253222346305847},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07218968868255615}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7995254993438721},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7716368436813354},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7095137238502502},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6150903105735779},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5853754281997681},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5771809816360474},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5719988942146301},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.553617000579834},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.4253222346305847},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07218968868255615},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ro-man53752.2022.9900738","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ro-man53752.2022.9900738","pdf_url":null,"source":{"id":"https://openalex.org/S4363607995","display_name":"2022 31st IEEE International Conference on Robot and Human Interactive Communication (RO-MAN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 31st IEEE International Conference on Robot and Human Interactive Communication (RO-MAN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1965568826","https://openalex.org/W2201581102","https://openalex.org/W2413397042","https://openalex.org/W2563524613","https://openalex.org/W2772721022","https://openalex.org/W2788862220","https://openalex.org/W2803616302","https://openalex.org/W2884282566","https://openalex.org/W2903242674","https://openalex.org/W2963073614","https://openalex.org/W2963079702","https://openalex.org/W2963099939","https://openalex.org/W2967355195","https://openalex.org/W2982852993","https://openalex.org/W2991355586","https://openalex.org/W2994446013","https://openalex.org/W3034971196","https://openalex.org/W3044051321","https://openalex.org/W3104029641","https://openalex.org/W3123212791","https://openalex.org/W3130292943","https://openalex.org/W3157722160","https://openalex.org/W3167624337","https://openalex.org/W3171801597","https://openalex.org/W3173978695","https://openalex.org/W3190355545","https://openalex.org/W3193304579","https://openalex.org/W3202439292","https://openalex.org/W3212911600","https://openalex.org/W4287324990","https://openalex.org/W4287756016","https://openalex.org/W4300799055","https://openalex.org/W4320013936","https://openalex.org/W6687681856","https://openalex.org/W6729966448","https://openalex.org/W6740801417","https://openalex.org/W6746809867","https://openalex.org/W6751285671","https://openalex.org/W6753554323","https://openalex.org/W6771270455","https://openalex.org/W6780032859","https://openalex.org/W6780404908","https://openalex.org/W6785329447","https://openalex.org/W6788500030","https://openalex.org/W6790689592","https://openalex.org/W6796992754","https://openalex.org/W6803890735"],"related_works":["https://openalex.org/W2032233321","https://openalex.org/W3121970507","https://openalex.org/W2110028391","https://openalex.org/W54497855","https://openalex.org/W217960748","https://openalex.org/W4306904969","https://openalex.org/W3125814499","https://openalex.org/W2090827041","https://openalex.org/W2094012830","https://openalex.org/W2967461658"],"abstract_inverted_index":{"Learning":[0],"from":[1,46,84,123],"demonstration":[2,85,152],"(LfD)":[3],"approaches":[4,30,86,142],"have":[5,25],"garnered":[6],"significant":[7,34],"interest":[8],"for":[9,38,56,116,143],"teaching":[10],"social":[11,58,144],"robots":[12,145],"a":[13,33,39,43,57,62,74,129],"variety":[14],"of":[15,36,81],"tasks":[16,122,148],"in":[17,66,149],"healthcare,":[18],"educational,":[19],"and":[20,94,157],"service":[21],"domains":[22,150],"after":[23],"they":[24],"been":[26],"deployed.":[27],"These":[28],"LfD":[29,141],"often":[31],"require":[32],"number":[35],"demonstrations":[37,55],"robot":[40,59],"to":[41,52,60,76,135,139,146],"learn":[42,61,147],"performant":[44],"model":[45],"task":[47,63],"demonstrations.":[48],"However,":[49],"requiring":[50],"non-experts":[51],"provide":[53],"numerous":[54],"is":[64,114,154],"impractical":[65],"real-world":[67],"applications.":[68],"In":[69,125],"this":[70],"paper,":[71],"we":[72],"propose":[73],"method":[75],"improve":[77,136],"the":[78,126,133],"sample":[79,118],"efficiency":[80,119],"existing":[82],"learning":[83,121],"via":[87],"data":[88,153],"augmentation,":[89],"dynamic":[90],"experience":[91],"replay":[92],"sizes,":[93],"hierarchical":[95,112],"Deep":[96],"Q-Networks":[97],"(DQN).":[98],"After":[99],"validating":[100],"our":[101,110,137],"methods":[102],"on":[103],"two":[104],"different":[105],"datasets,":[106],"results":[107],"suggest":[108],"that":[109],"proposed":[111],"DQN":[113],"effective":[115],"improving":[117],"when":[120],"demonstration.":[124],"future,":[127],"such":[128],"sample-efficient":[130],"approach":[131],"has":[132],"potential":[134],"ability":[138],"apply":[140],"where":[151],"limited,":[155],"sparse,":[156],"imbalanced.":[158]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}