{"id":"https://openalex.org/W3127391635","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534023","title":"Improving Model-Based Reinforcement Learning with Internal State Representations through Self-Supervision","display_name":"Improving Model-Based Reinforcement Learning with Internal State Representations through Self-Supervision","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3127391635","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534023","mag":"3127391635"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9534023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2102.05599","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051978528","display_name":"Julien Scholz","orcid":null},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Julien Scholz","raw_affiliation_strings":["Knowledge Technology, University of Hamburg, Germany","University of Hamburg,Knowledge Technology,Department of Informatics,Germany"],"affiliations":[{"raw_affiliation_string":"Knowledge Technology, University of Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]},{"raw_affiliation_string":"University of Hamburg,Knowledge Technology,Department of Informatics,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102025003","display_name":"Cornelius Weber","orcid":"https://orcid.org/0000-0001-5163-938X"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Cornelius Weber","raw_affiliation_strings":["Knowledge Technology, University of Hamburg, Germany","University of Hamburg,Knowledge Technology,Department of Informatics,Germany"],"affiliations":[{"raw_affiliation_string":"Knowledge Technology, University of Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]},{"raw_affiliation_string":"University of Hamburg,Knowledge Technology,Department of Informatics,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042086036","display_name":"Muhammad Burhan Hafez","orcid":"https://orcid.org/0000-0003-1670-8962"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Muhammad Burhan Hafez","raw_affiliation_strings":["Knowledge Technology, University of Hamburg, Germany","University of Hamburg,Knowledge Technology,Department of Informatics,Germany"],"affiliations":[{"raw_affiliation_string":"Knowledge Technology, University of Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]},{"raw_affiliation_string":"University of Hamburg,Knowledge Technology,Department of Informatics,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033486668","display_name":"Stefan Wermter","orcid":"https://orcid.org/0000-0003-1343-4775"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Wermter","raw_affiliation_strings":["Knowledge Technology, University of Hamburg, Germany","University of Hamburg,Knowledge Technology,Department of Informatics,Germany"],"affiliations":[{"raw_affiliation_string":"Knowledge Technology, University of Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]},{"raw_affiliation_string":"University of Hamburg,Knowledge Technology,Department of Informatics,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5051978528"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02005777,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11674","display_name":"Sports Analytics and Performance","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8679171800613403},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.7404462695121765},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7223438024520874},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6549524664878845},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5896187424659729},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.568055272102356},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5256955623626709},{"id":"https://openalex.org/keywords/internal-model","display_name":"Internal model","score":0.5083078742027283},{"id":"https://openalex.org/keywords/plan","display_name":"Plan (archaeology)","score":0.4543636441230774},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.42021340131759644},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.41907787322998047},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3544972240924835},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.15209472179412842},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.061259835958480835}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8679171800613403},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.7404462695121765},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7223438024520874},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6549524664878845},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5896187424659729},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.568055272102356},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5256955623626709},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.5083078742027283},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.4543636441230774},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.42021340131759644},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.41907787322998047},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3544972240924835},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.15209472179412842},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.061259835958480835},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9534023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.soton.ac.uk:495937","is_oa":false,"landing_page_url":"http://doi.org/10.1109/IJCNN52387.2021.9534023>).","pdf_url":null,"source":{"id":"https://openalex.org/S4306401019","display_name":"ePrints Soton (University of Southampton)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I43439940","host_organization_name":"University of Southampton","host_organization_lineage":["https://openalex.org/I43439940"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},{"id":"pmh:oai:arXiv.org:2102.05599","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2102.05599","pdf_url":"https://arxiv.org/pdf/2102.05599","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3127391635","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2102.05599.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2102.05599","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2102.05599","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2102.05599","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2102.05599","pdf_url":"https://arxiv.org/pdf/2102.05599","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11","score":0.5699999928474426}],"awards":[{"id":"https://openalex.org/G6276684567","display_name":null,"funder_award_id":"TRR 169","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1714211023","https://openalex.org/W1968962398","https://openalex.org/W2004915807","https://openalex.org/W2154997814","https://openalex.org/W2257979135","https://openalex.org/W2260756217","https://openalex.org/W2417089653","https://openalex.org/W2766447205","https://openalex.org/W2772709170","https://openalex.org/W2790924949","https://openalex.org/W2795756076","https://openalex.org/W2900152462","https://openalex.org/W2902125520","https://openalex.org/W2964227892","https://openalex.org/W2978069508","https://openalex.org/W3032077725","https://openalex.org/W3118210634","https://openalex.org/W4230431917","https://openalex.org/W6631190155","https://openalex.org/W6637441126","https://openalex.org/W6692846177","https://openalex.org/W6716653466","https://openalex.org/W6746177919","https://openalex.org/W6750409132","https://openalex.org/W6756256016","https://openalex.org/W6756908582","https://openalex.org/W6769166761","https://openalex.org/W6780559895","https://openalex.org/W6788072495"],"related_works":["https://openalex.org/W3199069166","https://openalex.org/W3041890730","https://openalex.org/W2977416770","https://openalex.org/W2790924949","https://openalex.org/W2294805292","https://openalex.org/W3202251623","https://openalex.org/W2999490157","https://openalex.org/W2158150115","https://openalex.org/W298069310","https://openalex.org/W2968284168","https://openalex.org/W57282082","https://openalex.org/W3151079898","https://openalex.org/W2908390575","https://openalex.org/W3010862467","https://openalex.org/W3167355872","https://openalex.org/W3201617237","https://openalex.org/W1492428185","https://openalex.org/W2535652371","https://openalex.org/W1573930840","https://openalex.org/W2950197980"],"abstract_inverted_index":{"Using":[0],"a":[1,92,97,132,157],"model":[2,38,94,125],"of":[3,102,123],"the":[4,33,36,45,53,78,85,113,149],"environment,":[5],"reinforcement":[6],"learning":[7,114],"agents":[8],"can":[9,39,151],"plan":[10],"their":[11],"future":[12],"moves":[13],"and":[14,24,96,106,127],"achieve":[15],"superhuman":[16],"performance":[17,134],"in":[18,136],"board":[19],"games":[20],"like":[21],"Chess,":[22],"Shogi,":[23],"Go,":[25],"while":[26,51],"remaining":[27],"relatively":[28],"sample-efficient.":[29],"As":[30],"demonstrated":[31],"by":[32],"MuZero":[34,60],"Algorithm,":[35],"environment":[37,68,86,154],"even":[40],"be":[41],"learned":[42],"dynamically,":[43],"generalizing":[44],"agent":[46],"to":[47,84,111],"many":[48],"more":[49],"tasks":[50],"at":[52],"same":[54],"time":[55],"achieving":[56],"state-of-the-art":[57],"performance.":[58],"Notably,":[59],"uses":[61],"internal":[62,81],"state":[63,82,87],"representations":[64],"derived":[65],"from":[66],"real":[67],"states":[69],"for":[70,146],"its":[71],"predictions.":[72],"In":[73],"this":[74,120],"paper,":[75],"we":[76],"bind":[77],"model's":[79],"predicted":[80],"representation":[83],"via":[88],"two":[89],"additional":[90],"terms:":[91],"reconstruction":[93,124],"loss":[95,126,130],"simpler":[98,128],"consistency":[99,129],"loss,":[100],"both":[101],"which":[103],"work":[104],"independently":[105],"unsupervised,":[107],"acting":[108],"as":[109],"constraints":[110],"stabilize":[112],"process.":[115],"Our":[116,140],"experiments":[117],"show":[118],"that":[119],"new":[121],"integration":[122],"provide":[131],"significant":[133],"increase":[135],"OpenAI":[137],"Gym":[138],"environments.":[139],"modifications":[141],"also":[142],"enable":[143],"self-supervised":[144],"pretraining":[145],"MuZero,":[147],"so":[148],"algorithm":[150],"learn":[152],"about":[153],"dynamics":[155],"before":[156],"goal":[158],"is":[159],"made":[160],"available.":[161]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
