{"id":"https://openalex.org/W2933058008","doi":"https://doi.org/10.1109/bigcomp.2019.8679366","title":"Accelerating Deep Reinforcement Learning Using Human Demonstration Data Based on Dual Replay Buffer Management and Online Frame Skipping","display_name":"Accelerating Deep Reinforcement Learning Using Human Demonstration Data Based on Dual Replay Buffer Management and Online Frame Skipping","publication_year":2019,"publication_date":"2019-02-01","ids":{"openalex":"https://openalex.org/W2933058008","doi":"https://doi.org/10.1109/bigcomp.2019.8679366","mag":"2933058008"},"language":"en","primary_location":{"id":"doi:10.1109/bigcomp.2019.8679366","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigcomp.2019.8679366","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data and Smart Computing (BigComp)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021331300","display_name":"Sangho Yeo","orcid":"https://orcid.org/0000-0002-9194-7552"},"institutions":[{"id":"https://openalex.org/I57664883","display_name":"Ajou University","ror":"https://ror.org/03tzb2h73","country_code":"KR","type":"education","lineage":["https://openalex.org/I57664883"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Sangho Yeo","raw_affiliation_strings":["Dept. of Computer Engineering, Ajou University, Suwon, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Engineering, Ajou University, Suwon, Republic of Korea","institution_ids":["https://openalex.org/I57664883"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003540648","display_name":"Sangyoon Oh","orcid":"https://orcid.org/0000-0001-5854-149X"},"institutions":[{"id":"https://openalex.org/I57664883","display_name":"Ajou University","ror":"https://ror.org/03tzb2h73","country_code":"KR","type":"education","lineage":["https://openalex.org/I57664883"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sangyoon Oh","raw_affiliation_strings":["Dept. of Computer Engineering, Ajou University, Suwon, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Engineering, Ajou University, Suwon, Republic of Korea","institution_ids":["https://openalex.org/I57664883"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101411024","display_name":"Minsu Lee","orcid":"https://orcid.org/0000-0001-9017-6998"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Minsu Lee","raw_affiliation_strings":["Institute of Computer Technology, Seoul National University, Seoul, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Institute of Computer Technology, Seoul National University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5021331300"],"corresponding_institution_ids":["https://openalex.org/I57664883"],"apc_list":null,"apc_paid":null,"fwci":0.42,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.69667939,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11197","display_name":"Digital Games and Media","score":0.9811000227928162,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8859903812408447},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8215957880020142},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6033579707145691},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.460782915353775},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32076185941696167}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8859903812408447},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8215957880020142},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6033579707145691},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.460782915353775},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32076185941696167},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigcomp.2019.8679366","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigcomp.2019.8679366","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data and Smart Computing (BigComp)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W2145339207","https://openalex.org/W2173248099","https://openalex.org/W2257979135","https://openalex.org/W2342840547","https://openalex.org/W2402219803","https://openalex.org/W2591957724","https://openalex.org/W2592915494","https://openalex.org/W2594103415","https://openalex.org/W2619240030","https://openalex.org/W2741122588","https://openalex.org/W2775795276","https://openalex.org/W2786928559","https://openalex.org/W2788862220","https://openalex.org/W2949561945","https://openalex.org/W2950462959","https://openalex.org/W2963296584","https://openalex.org/W2963363446","https://openalex.org/W2963864421","https://openalex.org/W2963871073","https://openalex.org/W2964043796","https://openalex.org/W3103780890","https://openalex.org/W4297824719","https://openalex.org/W4298857966","https://openalex.org/W4299851364","https://openalex.org/W6692846177"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Human":[0],"demonstration":[1,41,55,72,101],"data":[2,42,56,73,102,165,184],"plays":[3],"an":[4,114,136],"important":[5],"role":[6],"in":[7,228,239],"the":[8,17,147,150,161,199,219,224,236,240],"early":[9],"stage":[10],"of":[11,34,52,70,109,163,193,231,251],"deep":[12,85],"reinforcement":[13,25,36,86,225],"learning":[14,26,37,65,87,226],"to":[15,28,140,156,182,246],"accelerate":[16],"training":[18,148],"process":[19],"as":[20,22],"well":[21],"guiding":[23],"a":[24,49,83,90,110,119,248],"agent":[27],"learn":[29],"complicated":[30],"policy.":[31],"However,":[32],"most":[33,63],"current":[35],"approaches":[38],"with":[39,89,129,209],"human":[40,54,100,111,144,157,164,186],"and":[43,57,95,118,175,198],"reward":[44],"assumes":[45],"that":[46,58,202,243],"there":[47],"is":[48,59,74,153,166,244],"sufficient":[50],"amount":[51,69],"high-quality":[53],"not":[60],"true":[61],"for":[62,99],"real-world":[64],"cases":[66],"where":[67,160],"enough":[68],"experts'":[71],"always":[75],"limited.":[76],"To":[77],"overcome":[78],"this":[79],"limitation,":[80],"we":[81],"propose":[82,135],"novel":[84],"approach":[88],"dual":[91,105,210],"replay":[92,106,112,116,120,127,158,187,211],"buffer":[93,107,159],"management":[94],"online":[96,137,169,206],"frame":[97,138,151,207],"skipping":[98,139,152,208],"sampling.":[103],"The":[104],"consists":[108],"memory,":[113,117],"actor":[115],"manager.":[121],"And":[122],"it":[123],"can":[124],"manage":[125],"two":[126,205],"buffers":[128],"independent":[130],"sampling":[131],"policies.":[132],"We":[133,189],"also":[134],"fully":[141],"utilize":[142],"available":[143],"data.":[145],"During":[146],"period,":[149],"performed":[154],"dynamically":[155],"all":[162],"stored.":[167],"Two":[168],"frame-skipping,":[170],"namely,":[171],"FS-ER(Frame":[172],"Skipping-Experience":[173,178],"Replay)":[174,179],"DFS-ER(Dynamic":[176],"Frame":[177],"are":[180],"used":[181],"sample":[183],"from":[185],"buffer.":[188],"conducted":[190],"empirical":[191],"experiments":[192],"four":[194,232],"popular":[195],"Atari":[196],"games":[197],"results":[200],"show":[201],"our":[203],"proposed":[204],"memory":[212],"outperforms":[213],"existing":[214],"baselines.":[215],"Specifically,":[216],"DFS-ER":[217],"shows":[218,235],"fastest":[220],"score":[221],"increment":[222],"during":[223],"procedure":[227],"three":[229],"out":[230],"experiments.":[233],"FS-ER":[234],"best":[237],"performance":[238],"other":[241],"environment":[242],"hard":[245],"train":[247],"model":[249],"because":[250],"sparse":[252],"reward.":[253]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}