{"id":"https://openalex.org/W3089491416","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207302","title":"Adaptive Inner-reward Shaping in Sparse Reward Games","display_name":"Adaptive Inner-reward Shaping in Sparse Reward Games","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3089491416","doi":"https://doi.org/10.1109/ijcnn48605.2020.9207302","mag":"3089491416"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn48605.2020.9207302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207302","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"conference-paper","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101582325","display_name":"Dong Yang","orcid":"https://orcid.org/0000-0001-5294-4634"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Yang","raw_affiliation_strings":["HPCL College of Computer, National University of Defense Technology, Chang Sha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"HPCL College of Computer, National University of Defense Technology, Chang Sha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065371644","display_name":"Yuhua Tang","orcid":"https://orcid.org/0000-0002-4956-3379"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhua Tang","raw_affiliation_strings":["HPCL College of Computer, National University of Defense Technology, Chang Sha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"HPCL College of Computer, National University of Defense Technology, Chang Sha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10042","display_name":"Neural and Behavioral Psychology Studies","score":0.9793999791145325,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9750999808311462,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8824667930603027},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6583563685417175},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5451764464378357},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.47831252217292786},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4190694987773895},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41901981830596924}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8824667930603027},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6583563685417175},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5451764464378357},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.47831252217292786},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4190694987773895},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41901981830596924},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn48605.2020.9207302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn48605.2020.9207302","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W142858861","https://openalex.org/W172298727","https://openalex.org/W1191599655","https://openalex.org/W1499408472","https://openalex.org/W1522301498","https://openalex.org/W1575592356","https://openalex.org/W1757796397","https://openalex.org/W1777239053","https://openalex.org/W1977655452","https://openalex.org/W2045031658","https://openalex.org/W2088595989","https://openalex.org/W2121863487","https://openalex.org/W2145339207","https://openalex.org/W2159600763","https://openalex.org/W2159880874","https://openalex.org/W2170899200","https://openalex.org/W2257979135","https://openalex.org/W2342840547","https://openalex.org/W2626429629","https://openalex.org/W2736601468","https://openalex.org/W2885550588","https://openalex.org/W2904246096","https://openalex.org/W2905173465","https://openalex.org/W2945317616","https://openalex.org/W2952281591","https://openalex.org/W2963243556","https://openalex.org/W2963523627","https://openalex.org/W2964043796","https://openalex.org/W2964121744","https://openalex.org/W2997502221","https://openalex.org/W4234438384","https://openalex.org/W4298857966","https://openalex.org/W6605846256","https://openalex.org/W6607097208","https://openalex.org/W6627932998","https://openalex.org/W6631190155","https://openalex.org/W6634413486","https://openalex.org/W6637967152","https://openalex.org/W6638088447","https://openalex.org/W6683408100","https://openalex.org/W6683908939","https://openalex.org/W6692846177","https://openalex.org/W6704559304","https://openalex.org/W6741002519","https://openalex.org/W6753925943","https://openalex.org/W6756871163","https://openalex.org/W6757592117","https://openalex.org/W6762729159","https://openalex.org/W6765749921"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,5,52,87,116,158,189,238],"focuses":[2],"on":[3,16],"goal-directed":[4],"from":[6],"interaction":[7],"and":[8,25,54,88,112,226,244,246],"the":[9,19,23,39,77,100,114,137,155,162,169,215,237,249],"success":[10],"of":[11,65,72,85,196,217,242],"its":[12],"applications":[13],"strongly":[14],"depends":[15],"how":[17,26,141],"well":[18,27],"reward":[20,89,102,107,126,167],"signal":[21,92],"frames":[22],"problem":[24,103],"it":[28],"assesses":[29],"progress":[30],"in":[31,35,93,95,149,164,174,180,206,240],"solving":[32],"it.":[33],"But":[34],"many":[36],"real-world":[37],"scenarios,":[38],"agent":[40,115,138,250],"is":[41,62,172],"supplied":[42],"with":[43,119,154,191,221],"extremely":[44],"sparse":[45,101,166],"or":[46,199],"even":[47],"no":[48],"rewards":[49,73,145],"which":[50,208],"makes":[51],"fail":[53],"fall":[55],"into":[56],"ineffective":[57],"exploration.":[58],"In":[59],"psychology,":[60],"shaping":[61,127],"a":[63,91,106,175,185],"method":[64,235],"animal":[66,86],"training":[67],"by":[68,82,104],"reinforcing":[69],"successive":[70],"approximations":[71],"to":[74,109,139,142,146,251,256],"finally":[75],"achieve":[76],"desired":[78],"complex":[79,165],"behavior.":[80],"Inspired":[81],"this":[83,96],"phenomenon":[84],"as":[90],"neuroscience,":[94],"paper":[97],"we":[98],"solve":[99],"constructing":[105],"generator":[108],"generate":[110,143],"inner-rewards":[111,219],"guide":[113,147],"control":[117],"policies":[118],"deep":[120],"neural":[121],"networks.":[122],"The":[123],"proposed":[124,170],"learning-based":[125],"does":[128],"not":[129,210],"require":[130],"specific":[131],"domain":[132],"knowledge,":[133],"but":[134],"rather":[135],"enable":[136],"learn":[140,252],"inner":[144],"itself":[148],"any":[150],"scenarios":[151],"online":[152],"jointly":[153],"actual":[156],"reinforcement":[157,188,223],"process.":[159],"To":[160],"validate":[161],"performance":[163,216,239],"problems,":[168],"approach":[171,220],"evaluated":[173],"challenging":[176],"scenario,":[177],"Football":[178,183],"Academy":[179],"Google":[181],"Research":[182],"Environment,":[184],"newly":[186],"released":[187],"environment":[190],"physics-based":[192],"3D":[193],"simulator,":[194],"instead":[195],"maze":[197],"environments":[198],"grid":[200],"world":[201],"that":[202,233],"are":[203,209],"commonly":[204],"used":[205],"research":[207],"sufficiently":[211],"challenging.":[212],"We":[213],"compare":[214],"our":[218,234],"two":[222],"algorithms":[224],"(PPO":[225],"ICM":[227],"+":[228],"PPO).":[229],"Experimental":[230],"results":[231],"show":[232],"improves":[236],"terms":[241],"speed":[243],"quality,":[245],"also":[247],"enables":[248],"generalized":[253],"skills":[254],"applied":[255],"novel":[257],"scenarios.":[258]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2}],"updated_date":"2026-07-14T23:27:15.235271","created_date":"2025-10-10T00:00:00"}