{"id":"https://openalex.org/W2790924949","doi":"https://doi.org/10.1109/lra.2018.2800101","title":"Integrating State Representation Learning Into Deep Reinforcement Learning","display_name":"Integrating State Representation Learning Into Deep Reinforcement Learning","publication_year":2018,"publication_date":"2018-01-31","ids":{"openalex":"https://openalex.org/W2790924949","doi":"https://doi.org/10.1109/lra.2018.2800101","mag":"2790924949"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2018.2800101","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2018.2800101","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033347648","display_name":"Tim de Bruin","orcid":"https://orcid.org/0000-0003-1741-0850"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Tim de Bruin","raw_affiliation_strings":["Cognitive Robotics Department, Delft University of Technology, Delft, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0003-1741-0850","affiliations":[{"raw_affiliation_string":"Cognitive Robotics Department, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035229829","display_name":"Jens Kober","orcid":"https://orcid.org/0000-0001-7257-5434"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Jens Kober","raw_affiliation_strings":["Cognitive Robotics Department, Delft University of Technology, Delft, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0001-7257-5434","affiliations":[{"raw_affiliation_string":"Cognitive Robotics Department, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008547992","display_name":"Karl Tuyls","orcid":"https://orcid.org/0000-0001-7929-1944"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]},{"id":"https://openalex.org/I4210113297","display_name":"Google (United Kingdom)","ror":"https://ror.org/024bc3e07","country_code":"GB","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210113297","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Karl Tuyls","raw_affiliation_strings":["Google Deepmind, London, U.K","University of Liverpool, Liverpool, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Deepmind, London, U.K","institution_ids":["https://openalex.org/I4210113297"]},{"raw_affiliation_string":"University of Liverpool, Liverpool, U.K","institution_ids":["https://openalex.org/I146655781"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084264842","display_name":"Robert Babu\u0161ka","orcid":"https://orcid.org/0000-0001-9578-8598"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Robert Babuska","raw_affiliation_strings":["Cognitive Robotics Department, Delft University of Technology, Delft, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0001-9578-8598","affiliations":[{"raw_affiliation_string":"Cognitive Robotics Department, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5033347648"],"corresponding_institution_ids":["https://openalex.org/I98358874"],"apc_list":null,"apc_paid":null,"fwci":10.8301,"has_fulltext":false,"cited_by_count":118,"citation_normalized_percentile":{"value":0.98536233,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"3","issue":"3","first_page":"1394","last_page":"1401"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9722999930381775,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9710999727249146,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9222247004508972},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.7462800741195679},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6816152334213257},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6097513437271118},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5147040486335754},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.47336074709892273},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4732949137687683},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.4719739854335785}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9222247004508972},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.7462800741195679},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6816152334213257},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6097513437271118},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5147040486335754},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.47336074709892273},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4732949137687683},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.4719739854335785},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lra.2018.2800101","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2018.2800101","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},{"id":"pmh:oai:tudelft.nl:uuid:adbec839-4b4b-4877-9143-09346b560249","is_oa":false,"landing_page_url":"http://resolver.tudelft.nl/uuid:adbec839-4b4b-4877-9143-09346b560249","pdf_url":null,"source":{"id":"https://openalex.org/S4306400906","display_name":"Research Repository (Delft University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98358874","host_organization_name":"Delft University of Technology","host_organization_lineage":["https://openalex.org/I98358874"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1954264229","display_name":null,"funder_award_id":"number 656.000.003","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"}],"funders":[{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W834081922","https://openalex.org/W1164749991","https://openalex.org/W1522301498","https://openalex.org/W1836465849","https://openalex.org/W1968962398","https://openalex.org/W2056354534","https://openalex.org/W2100495367","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2146444479","https://openalex.org/W2155968351","https://openalex.org/W2184188583","https://openalex.org/W2417089653","https://openalex.org/W2440926996","https://openalex.org/W2509374375","https://openalex.org/W2551887912","https://openalex.org/W2567015638","https://openalex.org/W2567455162","https://openalex.org/W2570734388","https://openalex.org/W2592538810","https://openalex.org/W2605048551","https://openalex.org/W2619484182","https://openalex.org/W2619543829","https://openalex.org/W2727840223","https://openalex.org/W2746553466","https://openalex.org/W2950872548","https://openalex.org/W2951066214","https://openalex.org/W2962717849","https://openalex.org/W2962747641","https://openalex.org/W2962847657","https://openalex.org/W2963009616","https://openalex.org/W2963211300","https://openalex.org/W2963430173","https://openalex.org/W2963634205","https://openalex.org/W2964061993","https://openalex.org/W2964112890","https://openalex.org/W2964121744","https://openalex.org/W3011120880","https://openalex.org/W4256160650","https://openalex.org/W4294226150","https://openalex.org/W4297795161","https://openalex.org/W4297804343","https://openalex.org/W4396952261","https://openalex.org/W6623316541","https://openalex.org/W6631190155","https://openalex.org/W6638667902","https://openalex.org/W6686207219","https://openalex.org/W6687688904","https://openalex.org/W6716653466","https://openalex.org/W6718190810","https://openalex.org/W6720501231","https://openalex.org/W6729556111","https://openalex.org/W6731293529","https://openalex.org/W6731334075","https://openalex.org/W6736178813","https://openalex.org/W6738700159","https://openalex.org/W6738746681","https://openalex.org/W6775686901"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W20361778","https://openalex.org/W2024136090","https://openalex.org/W4206195464"],"abstract_inverted_index":{"Most":[0],"deep":[1,201],"reinforcement":[2,123,136,153,157,202],"learning":[3,43,97,124,134,144,158,167,203],"techniques":[4],"are":[5,117],"unsuitable":[6],"for":[7,130],"robotics,":[8],"as":[9,39,162],"they":[10,116],"require":[11],"too":[12],"much":[13,198],"interaction":[14],"time":[15],"to":[16,28,36,82,102,171,195],"learn":[17,104,191],"useful,":[18],"general":[19,86],"control":[20,44],"policies.":[21],"This":[22],"problem":[23],"can":[24,47,99],"be":[25,37,49,100],"largely":[26],"attributed":[27],"the":[29,60,66,69,72,92,141,148,152,156,174,181,187],"fact":[30],"that":[31,89,193],"a":[32,40,84,106,163],"state":[33,70,90,132,142,149,165,205],"representation":[34,87,96,133,143,150,166,206],"needs":[35],"learned":[38],"part":[41],"of":[42,68,71,88,110],"policies,":[45],"which":[46],"only":[48],"done":[50],"through":[51],"fitting":[52],"expected":[53],"returns":[54],"based":[55],"on":[56,65,80],"observed":[57],"rewards.":[58],"While":[59,108],"reward":[61],"function":[62],"provides":[63],"information":[64,79],"desirability":[67],"world,":[73],"it":[74],"does":[75],"not":[76,119],"necessarily":[77],"provide":[78],"how":[81,186],"distill":[83],"good,":[85],"from":[91],"sensory":[93],"observations.":[94],"State":[95],"objectives":[98,112,145],"used":[101],"help":[103,146,172],"such":[105],"representation.":[107,175],"many":[109],"these":[111,139],"have":[113],"been":[114],"proposed,":[115],"typically":[118],"directly":[120],"combined":[121],"with":[122],"algorithms.":[125],"We":[126],"investigate":[127],"several":[128],"methods":[129,189],"integrating":[131],"into":[135],"learning.":[137,207],"In":[138],"methods,":[140],"regularize":[147],"during":[151],"learning,":[154],"and":[155,169],"itself":[159],"is":[160],"viewed":[161],"crucial":[164],"objective":[168],"allowed":[170],"shape":[173],"Using":[176],"autonomous":[177],"racing":[178],"tests":[179],"in":[180],"TORCS":[182],"simulator,":[183],"we":[184],"show":[185],"integrated":[188],"quickly":[190],"policies":[192],"generalize":[194],"new":[196],"environments":[197],"better":[199],"than":[200],"without":[204]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":16},{"year":2021,"cited_by_count":23},{"year":2020,"cited_by_count":22},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":7}],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2018-03-29T00:00:00"}
