{"id":"https://openalex.org/W4415241134","doi":"https://doi.org/10.3390/computers14100434","title":"Curiosity-Driven Exploration in Reinforcement Learning: An Adaptive Self-Supervised Learning Approach for Playing Action Games","display_name":"Curiosity-Driven Exploration in Reinforcement Learning: An Adaptive Self-Supervised Learning Approach for Playing Action Games","publication_year":2025,"publication_date":"2025-10-13","ids":{"openalex":"https://openalex.org/W4415241134","doi":"https://doi.org/10.3390/computers14100434"},"language":"en","primary_location":{"id":"doi:10.3390/computers14100434","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers14100434","pdf_url":"https://www.mdpi.com/2073-431X/14/10/434/pdf?version=1760495233","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2073-431X/14/10/434/pdf?version=1760495233","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041431752","display_name":"Sehar Shahzad Farooq","orcid":"https://orcid.org/0000-0002-2571-9121"},"institutions":[{"id":"https://openalex.org/I189442560","display_name":"Gyeongsang National University","ror":"https://ror.org/00saywf64","country_code":"KR","type":"education","lineage":["https://openalex.org/I189442560"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sehar Shahzad Farooq","raw_affiliation_strings":["School of Aerospace Engineering, Department of Control and Robot Engineering, Gyeongsang National University, Jinju 52828, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"School of Aerospace Engineering, Department of Control and Robot Engineering, Gyeongsang National University, Jinju 52828, Republic of Korea","institution_ids":["https://openalex.org/I189442560"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101787006","display_name":"Hameedur Rahman","orcid":"https://orcid.org/0000-0001-8892-9911"},"institutions":[{"id":"https://openalex.org/I899713450","display_name":"Air University","ror":"https://ror.org/03yfe9v83","country_code":"PK","type":"education","lineage":["https://openalex.org/I899713450"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Hameedur Rahman","raw_affiliation_strings":["Department of Computer Games Development, Faculty of Computing and AI, Air University, Islamabad 44000, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Games Development, Faculty of Computing and AI, Air University, Islamabad 44000, Pakistan","institution_ids":["https://openalex.org/I899713450"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069918865","display_name":"Samiya Abdul Wahid","orcid":null},"institutions":[{"id":"https://openalex.org/I899713450","display_name":"Air University","ror":"https://ror.org/03yfe9v83","country_code":"PK","type":"education","lineage":["https://openalex.org/I899713450"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Samiya Abdul Wahid","raw_affiliation_strings":["Department of Computer Games Development, Faculty of Computing and AI, Air University, Islamabad 44000, Pakistan","Department of Psychology, Faculty of Social Sciences, Air University, Islamabad 44000, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Games Development, Faculty of Computing and AI, Air University, Islamabad 44000, Pakistan","institution_ids":["https://openalex.org/I899713450"]},{"raw_affiliation_string":"Department of Psychology, Faculty of Social Sciences, Air University, Islamabad 44000, Pakistan","institution_ids":["https://openalex.org/I899713450"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055424665","display_name":"M. A. Ansari","orcid":"https://orcid.org/0009-0006-3414-9112"},"institutions":[{"id":"https://openalex.org/I899713450","display_name":"Air University","ror":"https://ror.org/03yfe9v83","country_code":"PK","type":"education","lineage":["https://openalex.org/I899713450"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Muhammad Alyan Ansari","raw_affiliation_strings":["Department of Computer Games Development, Faculty of Computing and AI, Air University, Islamabad 44000, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Games Development, Faculty of Computing and AI, Air University, Islamabad 44000, Pakistan","institution_ids":["https://openalex.org/I899713450"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108172193","display_name":"Saira Abdul Wahid","orcid":null},"institutions":[{"id":"https://openalex.org/I899713450","display_name":"Air University","ror":"https://ror.org/03yfe9v83","country_code":"PK","type":"education","lineage":["https://openalex.org/I899713450"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Saira Abdul Wahid","raw_affiliation_strings":["Department of Computer Games Development, Faculty of Computing and AI, Air University, Islamabad 44000, Pakistan","Department of Psychology, Faculty of Social Sciences, Air University, Islamabad 44000, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Games Development, Faculty of Computing and AI, Air University, Islamabad 44000, Pakistan","institution_ids":["https://openalex.org/I899713450"]},{"raw_affiliation_string":"Department of Psychology, Faculty of Social Sciences, Air University, Islamabad 44000, Pakistan","institution_ids":["https://openalex.org/I899713450"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043134790","display_name":"Hosu Lee","orcid":"https://orcid.org/0000-0002-8702-5993"},"institutions":[{"id":"https://openalex.org/I189442560","display_name":"Gyeongsang National University","ror":"https://ror.org/00saywf64","country_code":"KR","type":"education","lineage":["https://openalex.org/I189442560"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Hosu Lee","raw_affiliation_strings":["School of Aerospace Engineering, Department of Control and Robot Engineering, Gyeongsang National University, Jinju 52828, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"School of Aerospace Engineering, Department of Control and Robot Engineering, Gyeongsang National University, Jinju 52828, Republic of Korea","institution_ids":["https://openalex.org/I189442560"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5043134790"],"corresponding_institution_ids":["https://openalex.org/I189442560"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":5.1026,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95635673,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"14","issue":"10","first_page":"434","last_page":"434"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9520999789237976,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14308","display_name":"Psychological and Educational Research Studies","score":0.9492999911308289,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8194000124931335},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6168000102043152},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5789999961853027},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.578499972820282},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5759000182151794},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5418000221252441},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.48249998688697815},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.3919000029563904}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8194000124931335},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7574999928474426},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6168000102043152},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5963000059127808},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5789999961853027},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.578499972820282},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5759000182151794},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5418000221252441},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.48249998688697815},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4693000018596649},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.40849998593330383},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3919000029563904},{"id":"https://openalex.org/C3018412434","wikidata":"https://www.wikidata.org/wiki/Q7889","display_name":"Video game","level":2,"score":0.375},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.334199994802475},{"id":"https://openalex.org/C33435437","wikidata":"https://www.wikidata.org/wiki/Q366791","display_name":"Curiosity","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2985000014305115},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C177142836","wikidata":"https://www.wikidata.org/wiki/Q44455","display_name":"Game theory","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.2791999876499176},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.2572000026702881}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/computers14100434","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers14100434","pdf_url":"https://www.mdpi.com/2073-431X/14/10/434/pdf?version=1760495233","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:b5324edfa5494c20a476323e4f3f4ab7","is_oa":true,"landing_page_url":"https://doaj.org/article/b5324edfa5494c20a476323e4f3f4ab7","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computers, Vol 14, Iss 10, p 434 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/computers14100434","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers14100434","pdf_url":"https://www.mdpi.com/2073-431X/14/10/434/pdf?version=1760495233","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3130070122","display_name":null,"funder_award_id":"MOTIE","funder_id":"https://openalex.org/F4320335199","funder_display_name":"Korea Institute of Energy Technology Evaluation and Planning"},{"id":"https://openalex.org/G5092055710","display_name":null,"funder_award_id":"000000","funder_id":"https://openalex.org/F4320321681","funder_display_name":"Ministry of Trade, Industry and Energy"},{"id":"https://openalex.org/G7121727950","display_name":null,"funder_award_id":"KETEP)","funder_id":"https://openalex.org/F4320335199","funder_display_name":"Korea Institute of Energy Technology Evaluation and Planning"},{"id":"https://openalex.org/G992484961","display_name":null,"funder_award_id":"Korea","funder_id":"https://openalex.org/F4320321681","funder_display_name":"Ministry of Trade, Industry and Energy"}],"funders":[{"id":"https://openalex.org/F4320321318","display_name":"Gyeongsang National University","ror":"https://ror.org/00saywf64"},{"id":"https://openalex.org/F4320321681","display_name":"Ministry of Trade, Industry and Energy","ror":"https://ror.org/008nkqk13"},{"id":"https://openalex.org/F4320335199","display_name":"Korea Institute of Energy Technology Evaluation and Planning","ror":"https://ror.org/02zq38y32"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415241134.pdf","grobid_xml":"https://content.openalex.org/works/W4415241134.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W2002196558","https://openalex.org/W2020920737","https://openalex.org/W2034806191","https://openalex.org/W2069797667","https://openalex.org/W2161966552","https://openalex.org/W2765830379","https://openalex.org/W2808378895","https://openalex.org/W2894358134","https://openalex.org/W2896473780","https://openalex.org/W2904578482","https://openalex.org/W2962693466","https://openalex.org/W2962730405","https://openalex.org/W2963305465","https://openalex.org/W2963523627","https://openalex.org/W2999850024","https://openalex.org/W3000251863","https://openalex.org/W3092563415","https://openalex.org/W3094607870","https://openalex.org/W3107575247","https://openalex.org/W3108875650","https://openalex.org/W3116831358","https://openalex.org/W3118881636","https://openalex.org/W3131479975","https://openalex.org/W3136629921","https://openalex.org/W3176730514","https://openalex.org/W3213774989","https://openalex.org/W4283786485","https://openalex.org/W4302424597","https://openalex.org/W4303422666","https://openalex.org/W4321636951","https://openalex.org/W4362735644","https://openalex.org/W4367298969","https://openalex.org/W4377695283","https://openalex.org/W4381890308","https://openalex.org/W4394816255","https://openalex.org/W4403296091","https://openalex.org/W4407741524","https://openalex.org/W4410426567","https://openalex.org/W4413479706"],"related_works":[],"abstract_inverted_index":{"Games":[0],"are":[1,135],"considered":[2],"a":[3],"suitable":[4],"and":[5,21,39,70,86,112,147,166,194,212,225,236],"standard":[6],"benchmark":[7],"for":[8,117,233],"checking":[9],"the":[10,23,34,40,93,100,125,154,176,216],"performance":[11,24],"of":[12,18,25,33,218],"artificial":[13],"intelligence-based":[14],"algorithms":[15],"in":[16,55,61,89,158,172,207,227,239],"terms":[17],"training,":[19],"evaluating,":[20],"comparing":[22],"AI":[26],"agents.":[27],"In":[28],"this":[29,73],"research,":[30],"an":[31,191],"application":[32],"Intrinsic":[35],"Curiosity":[36],"Module":[37],"(ICM)":[38],"Asynchronous":[41],"Advantage":[42],"Actor\u2013Critic":[43],"(A3C)":[44],"algorithm":[45,130],"is":[46,64,122],"explored":[47],"using":[48,124],"action":[49,62,90,173,220,228],"games.":[50,91,221],"Having":[51],"been":[52],"proven":[53],"successful":[54],"several":[56],"gaming":[57],"environments,":[58,102],"its":[59],"effectiveness":[60],"games":[63,229],"rarely":[65],"explored.":[66],"Providing":[67],"efficient":[68,237],"learning":[69,88,178,195,235],"adaptation":[71],"facilities,":[72],"research":[74,202],"aims":[75],"to":[76,106,198,204],"assess":[77],"whether":[78],"integrating":[79],"ICM":[80,143,165,224],"with":[81,99,131,137,144,175],"A3C":[82,120,167,226],"promotes":[83],"curiosity-driven":[84,170,205],"explorations":[85],"adaptive":[87,234],"Using":[92],"MAME":[94],"Toolkit":[95],"library,":[96],"we":[97,152,188],"interface":[98],"game":[101,104,115],"preprocess":[103],"screens":[105],"focus":[107],"on":[108,184],"relevant":[109],"visual":[110],"elements,":[111],"create":[113],"diverse":[114],"episodes":[116],"training.":[118],"The":[119,161],"policy":[121],"optimized":[123],"Proximal":[126],"Policy":[127],"Optimization":[128],"(PPO)":[129],"tuned":[132],"hyperparameters.":[133],"Comparisons":[134],"made":[136],"baseline":[138,199],"methods,":[139],"including":[140],"vanilla":[141],"A3C,":[142],"pixel-based":[145],"predictions,":[146],"state-of-the-art":[148],"exploration":[149,171,179,206,217,238],"techniques.":[150],"Additionally,":[151],"evaluate":[153],"agent\u2019s":[155],"generalization":[156],"capability":[157],"separate":[159],"environments.":[160,242],"results":[162],"demonstrate":[163],"that":[164],"effectively":[168],"promote":[169],"games,":[174],"agent":[177],"behaviors":[180],"without":[181],"relying":[182],"solely":[183],"external":[185],"rewards.":[186],"Notably,":[187],"also":[189],"observed":[190],"improved":[192],"efficiency":[193],"speed":[196],"compared":[197],"approaches.":[200],"This":[201],"contributes":[203],"reinforcement":[208],"learning-based":[209],"virtual":[210],"environments":[211],"provides":[213],"insights":[214],"into":[215],"complex":[219],"Successfully":[222],"applying":[223],"presents":[230],"exciting":[231],"opportunities":[232],"challenging":[240],"real-world":[241]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-25T14:56:36.534964","created_date":"2025-10-16T00:00:00"}
