{"id":"https://openalex.org/W4400581613","doi":"https://doi.org/10.3390/s24144513","title":"Generalization Enhancement of Visual Reinforcement Learning through Internal States","display_name":"Generalization Enhancement of Visual Reinforcement Learning through Internal States","publication_year":2024,"publication_date":"2024-07-12","ids":{"openalex":"https://openalex.org/W4400581613","doi":"https://doi.org/10.3390/s24144513","pmid":"https://pubmed.ncbi.nlm.nih.gov/39065911"},"language":"en","primary_location":{"id":"doi:10.3390/s24144513","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s24144513","pdf_url":"https://www.mdpi.com/1424-8220/24/14/4513/pdf?version=1720778578","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1424-8220/24/14/4513/pdf?version=1720778578","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101291103","display_name":"Hanlin Yang","orcid":"https://orcid.org/0009-0006-2132-9763"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanlin Yang","raw_affiliation_strings":["Institute of Fundamental and Frontier Sciences, University of Electronic Science and Technology of China, Chengdu 611731, China"],"raw_orcid":"https://orcid.org/0009-0000-3875-7971","affiliations":[{"raw_affiliation_string":"Institute of Fundamental and Frontier Sciences, University of Electronic Science and Technology of China, Chengdu 611731, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078911343","display_name":"William Zhu","orcid":"https://orcid.org/0000-0001-8898-9244"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"William Zhu","raw_affiliation_strings":["Institute of Fundamental and Frontier Sciences, University of Electronic Science and Technology of China, Chengdu 611731, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Fundamental and Frontier Sciences, University of Electronic Science and Technology of China, Chengdu 611731, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061478689","display_name":"Xianchao Zhu","orcid":"https://orcid.org/0000-0002-7148-7923"},"institutions":[{"id":"https://openalex.org/I36152291","display_name":"Henan University of Technology","ror":"https://ror.org/05sbgwt55","country_code":"CN","type":"education","lineage":["https://openalex.org/I36152291"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xianchao Zhu","raw_affiliation_strings":["School of Artificial Intelligence and Big Data, Henan University of Technology, Zhengzhou 450001, China"],"raw_orcid":"https://orcid.org/0000-0002-7148-7923","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Big Data, Henan University of Technology, Zhengzhou 450001, China","institution_ids":["https://openalex.org/I36152291"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5061478689"],"corresponding_institution_ids":["https://openalex.org/I36152291"],"apc_list":{"value":2400,"currency":"CHF","value_usd":2598},"apc_paid":{"value":2400,"currency":"CHF","value_usd":2598},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08379962,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"24","issue":"14","first_page":"4513","last_page":"4513"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8314282298088074},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.8061301708221436},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6064224243164062},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5314378142356873},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4541371464729309},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.4499835968017578},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4334448575973511},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4125733971595764},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.410990834236145},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10550761222839355}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8314282298088074},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.8061301708221436},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6064224243164062},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5314378142356873},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4541371464729309},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.4499835968017578},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4334448575973511},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4125733971595764},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.410990834236145},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10550761222839355},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/s24144513","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s24144513","pdf_url":"https://www.mdpi.com/1424-8220/24/14/4513/pdf?version=1720778578","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},{"id":"pmid:39065911","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39065911","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors (Basel, Switzerland)","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:11280822","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11280822","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11280822/pdf/sensors-24-04513.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sensors (Basel)","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:c934e9cc58dc4fbca62fe9e2e7fb63a2","is_oa":false,"landing_page_url":"https://doaj.org/article/c934e9cc58dc4fbca62fe9e2e7fb63a2","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sensors, Vol 24, Iss 14, p 4513 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/s24144513","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s24144513","pdf_url":"https://www.mdpi.com/1424-8220/24/14/4513/pdf?version=1720778578","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6200000047683716}],"awards":[{"id":"https://openalex.org/G129400711","display_name":null,"funder_award_id":"31401529","funder_id":"https://openalex.org/F4320321955","funder_display_name":"Henan University of Technology"}],"funders":[{"id":"https://openalex.org/F4320321955","display_name":"Henan University of Technology","ror":"https://ror.org/05sbgwt55"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4400581613.pdf","grobid_xml":"https://content.openalex.org/works/W4400581613.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W2076337359","https://openalex.org/W2108598243","https://openalex.org/W2112796928","https://openalex.org/W2145339207","https://openalex.org/W2605102758","https://openalex.org/W2618530766","https://openalex.org/W2781726626","https://openalex.org/W2808849637","https://openalex.org/W2903181768","https://openalex.org/W2949736877","https://openalex.org/W2952440204","https://openalex.org/W2962887844","https://openalex.org/W2967681060","https://openalex.org/W2981344907","https://openalex.org/W3021708257","https://openalex.org/W3035682985","https://openalex.org/W3049203137","https://openalex.org/W3107359931","https://openalex.org/W3173518742","https://openalex.org/W4394755367","https://openalex.org/W4395010887","https://openalex.org/W4395666233","https://openalex.org/W4396937861","https://openalex.org/W4398141862","https://openalex.org/W6669402789","https://openalex.org/W6687483927","https://openalex.org/W6761443123","https://openalex.org/W6765092683","https://openalex.org/W6776867236"],"related_works":["https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2501594388","https://openalex.org/W4297821278"],"abstract_inverted_index":{"Visual":[0],"reinforcement":[1,24,101,172,242],"learning":[2,25,102,167,173,243],"is":[3,26,44,130,203,253],"important":[4],"in":[5,22,52,99,233,278],"various":[6],"practical":[7],"applications,":[8],"such":[9],"as":[10],"video":[11],"games,":[12],"robotic":[13],"manipulation,":[14],"and":[15,65,112,129,148,169,185,236,260,276],"autonomous":[16],"navigation.":[17],"However,":[18],"a":[19,77,83,109,113,240],"major":[20],"challenge":[21],"visual":[23,100,171,209,231],"the":[27,48,116,120,125,134,138,145,165,176,179,189,194,197,200,213,221,224,230,234,256,261],"generalization":[28,98,274],"to":[29,96,122,132,151,188,205],"unseen":[30,40],"environments,":[31],"that":[32,211,269],"is,":[33],"how":[34],"agents":[35],"manage":[36],"environments":[37,184],"with":[38,58,183,245,263],"previously":[39],"backgrounds.":[41],"This":[42,80],"issue":[43],"triggered":[45],"mainly":[46],"by":[47],"high":[49,214],"unpredictability":[50,215],"inherent":[51],"high-dimensional":[53,217],"observation":[54,218],"space.":[55,219],"To":[56],"deal":[57],"this":[59],"problem,":[60],"techniques":[61],"including":[62],"domain":[63],"randomization":[64],"data":[66],"augmentation":[67],"have":[68],"been":[69],"explored;":[70],"nevertheless,":[71],"these":[72],"methods":[73],"still":[74],"cannot":[75],"attain":[76],"satisfactory":[78],"result.":[79],"paper":[81],"proposes":[82],"new":[84],"method":[85,105,158,252,271],"named":[86],"Internal":[87],"States":[88],"Simulation":[89],"Auxiliary":[90],"(ISSA),":[91],"which":[92],"uses":[93],"internal":[94,127],"states":[95,128],"improve":[97],"tasks.":[103,282],"Our":[104],"contains":[106],"two":[107,163],"agents,":[108],"teacher":[110,117,146,180,198],"agent":[111,118,140,147,181,202,226,244],"student":[114,135,139,191,201,225],"agent:":[115],"has":[119],"ability":[121,275],"directly":[123],"access":[124],"environment's":[126],"used":[131],"facilitate":[133],"agent's":[136],"training;":[137],"receives":[141],"initial":[142],"guidance":[143,195],"from":[144,229],"subsequently":[149],"continues":[150],"learn":[152],"independently.":[153],"From":[154],"another":[155],"perspective,":[156],"our":[157,251,270],"can":[159],"be":[160],"divided":[161],"into":[162],"phases,":[164],"transfer":[166],"phase":[168],"traditional":[170],"phase.":[174],"In":[175,220],"first":[177],"phase,":[178,223],"interacts":[182],"imparts":[186],"knowledge":[187],"vision-based":[190,241],"agent.":[192],"With":[193],"of":[196,216,250],"agent,":[199],"able":[204],"discover":[206],"more":[207],"effective":[208],"representations":[210],"address":[212],"next":[222],"autonomously":[227],"learns":[228],"information":[232],"environment,":[235],"ultimately,":[237],"it":[238],"becomes":[239],"enhanced":[246],"generalization.":[247],"The":[248],"effectiveness":[249],"evaluated":[254],"using":[255],"DMControl":[257],"Generalization":[258],"Benchmark":[259],"DrawerWorld":[262],"texture":[264],"distortions.":[265],"Preliminary":[266],"results":[267],"indicate":[268],"significantly":[272],"improves":[273],"performance":[277],"complex":[279],"continuous":[280],"control":[281]},"counts_by_year":[],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
