{"meta": {"count": 4855, "db_response_time_ms": 301, "page": 1, "per_page": 25, "groups_count": null, "x_query": {"oql": "works where full text has (\n    not simulation\n    and stemmed \"robot control\"\n    and reinforcement learning\n  )", "oqo": {"get_rows": "works", "filter_rows": [{"column_id": "fulltext.search", "value": "simulation", "operator": "has", "is_negated": true}, {"column_id": "fulltext.search", "value": "\"robot control\"", "operator": "has"}, {"column_id": "fulltext.search", "value": "reinforcement learning", "operator": "has"}]}, "url": "/works?filter=fulltext.search:%21simulation,fulltext.search:%22robot control%22,fulltext.search:reinforcement learning"}, "cost_usd": 0.001}, "results": [{"id": "https://openalex.org/W2611243847", "doi": null, "title": "Planning Algorithms", "display_name": "Planning Algorithms", "relevance_score": 755.12225, "publication_year": 2006, "publication_date": "2006-01-01", "ids": {"openalex": "https://openalex.org/W2611243847", "mag": "2611243847"}, "language": "en", "primary_location": {"id": "pmh:oai:CiteSeerX.psu:10.1.1.1.7086", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.7086", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://msl.cs.uiuc.edu/planning/bookbig.pdf", "raw_type": "text"}, "type": "book", "indexed_in": [], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5065104734", "display_name": "Steven M. LaValle", "orcid": "https://orcid.org/0000-0003-4841-2584"}, "institutions": [], "countries": [], "is_corresponding": true, "raw_author_name": "Steven M. LaValle", "raw_affiliation_strings": [], "raw_orcid": "https://orcid.org/0000-0003-4841-2584", "affiliations": []}], "institutions": [], "countries_distinct_count": 0, "institutions_distinct_count": 0, "corresponding_author_ids": ["https://openalex.org/A5065104734"], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": 107.2884, "has_fulltext": false, "cited_by_count": 4200, "citation_normalized_percentile": {"value": 1.0, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 91, "max": 100}, "biblio": {"volume": null, "issue": null, "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10906", "display_name": "AI-based Problem Solving and Planning", "score": 0.05660000070929527, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10906", "display_name": "AI-based Problem Solving and Planning", "score": 0.05660000070929527, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5103651881217957}, {"id": "https://openalex.org/keywords/algorithm", "display_name": "Algorithm", "score": 0.44396767020225525}], "concepts": [{"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5103651881217957}, {"id": "https://openalex.org/C11413529", "wikidata": "https://www.wikidata.org/wiki/Q8366", "display_name": "Algorithm", "level": 1, "score": 0.44396767020225525}], "mesh": [], "locations_count": 6, "locations": [{"id": "pmh:oai:CiteSeerX.psu:10.1.1.1.7086", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.7086", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://msl.cs.uiuc.edu/planning/bookbig.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.221.1786", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.221.1786", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://msl.cs.uiuc.edu/planning/booka4.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.221.3806", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.221.3806", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://msl.cs.uiuc.edu/planning/book.pdf", "raw_type": "text"}, {"id": "pmh:oai:aleph.bib-bvb.de:BVB01-014980281", "is_oa": false, "landing_page_url": "http://www.loc.gov/catdir/enhancements/fy0642/2006010125-d.html", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, {"id": "pmh:oai:cds.cern.ch:991583", "is_oa": false, "landing_page_url": "http://cds.cern.ch/record/991583", "pdf_url": null, "source": {"id": "https://openalex.org/S4306402195", "display_name": "CERN Document Server (European Organization for Nuclear Research)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I67311998", "host_organization_name": "European Organization for Nuclear Research", "host_organization_lineage": ["https://openalex.org/I67311998"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": null}, {"id": "mag:2611243847", "is_oa": false, "landing_page_url": "https://dl.acm.org/citation.cfm?id=1213331", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": null}], "best_oa_location": null, "sustainable_development_goals": [{"display_name": "Sustainable cities and communities", "id": "https://metadata.un.org/sdg/11", "score": 0.41999998688697815}], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 0, "referenced_works": [], "related_works": ["https://openalex.org/W2336416123", "https://openalex.org/W2313274380", "https://openalex.org/W2169528473", "https://openalex.org/W2159722616", "https://openalex.org/W2141664020", "https://openalex.org/W2128990851", "https://openalex.org/W2122410182", "https://openalex.org/W2110762409", "https://openalex.org/W2103120971", "https://openalex.org/W2098432798", "https://openalex.org/W2036016432", "https://openalex.org/W2000359213", "https://openalex.org/W1971998222", "https://openalex.org/W1971086298", "https://openalex.org/W1969483458", "https://openalex.org/W1521785144", "https://openalex.org/W1516027685", "https://openalex.org/W1424654272", "https://openalex.org/W131069610", "https://openalex.org/W101508493"], "abstract_inverted_index": {"This": [0], "book": [1], "presents": [2], "a": [3], "unified": [4], "treatment": [5], "of": [6, 10], "many": [7], "different": [8], "kinds": [9], "planning": [11, 38], "algorithms.": [12], "The": [13, 29], "subject": [14], "lies": [15], "at": [16], "the": [17], "crossroads": [18], "between": [19], "robotics,": [20], "control": [21], "theory,": [22, 47], "artificial": [23], "intelligence,": [24], "algorithms,": [25], "and": [26, 58], "computer": [27], "graphics.": [28], "particular": [30], "subjects": [31], "covered": [32], "include": [33], "motion": [34], "planning,": [35, 37, 42, 45, 55, 57], "discrete": [36], "under": [39], "uncertainty,": [40], "sensor-based": [41], "visibility,": [43], "decision-theoretic": [44], "game": [46], "information": [48], "spaces,": [49], "reinforcement": [50], "learning,": [51], "nonlinear": [52], "systems,": [53], "trajectory": [54], "nonholonomic": [56], "kinodynamic": [59], "planning.": [60]}, "counts_by_year": [{"year": 2025, "cited_by_count": 1}, {"year": 2023, "cited_by_count": 5}, {"year": 2022, "cited_by_count": 15}, {"year": 2021, "cited_by_count": 210}, {"year": 2020, "cited_by_count": 281}, {"year": 2019, "cited_by_count": 250}, {"year": 2018, "cited_by_count": 226}, {"year": 2017, "cited_by_count": 212}, {"year": 2016, "cited_by_count": 246}, {"year": 2015, "cited_by_count": 333}, {"year": 2014, "cited_by_count": 306}, {"year": 2013, "cited_by_count": 325}, {"year": 2012, "cited_by_count": 302}], "updated_date": "2026-07-02T09:51:11.867554", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2121517924", "doi": "https://doi.org/10.1613/jair.639", "title": "Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition", "display_name": "Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition", "relevance_score": 548.0984, "publication_year": 2000, "publication_date": "2000-11-01", "ids": {"openalex": "https://openalex.org/W2121517924", "doi": "https://doi.org/10.1613/jair.639", "mag": "2121517924"}, "language": "en", "primary_location": {"id": "doi:10.1613/jair.639", "is_oa": true, "landing_page_url": "https://doi.org/10.1613/jair.639", "pdf_url": "https://jair.org/index.php/jair/article/download/10266/24463", "source": {"id": "https://openalex.org/S139930977", "display_name": "Journal of Artificial Intelligence Research", "issn_l": "1076-9757", "issn": ["1076-9757", "1943-5037"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310315760", "host_organization_name": "AI Access Foundation", "host_organization_lineage": ["https://openalex.org/P4310315760"], "host_organization_lineage_names": ["AI Access Foundation"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Journal of Artificial Intelligence Research", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref", "doaj"], "open_access": {"is_oa": true, "oa_status": "diamond", "oa_url": "https://jair.org/index.php/jair/article/download/10266/24463", "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5067135220", "display_name": "Tom Dietterich", "orcid": null}, "institutions": [{"id": "https://openalex.org/I131249849", "display_name": "Oregon State University", "ror": "https://ror.org/00ysfqy60", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I131249849"]}], "countries": ["US"], "is_corresponding": true, "raw_author_name": "T. G. Dietterich", "raw_affiliation_strings": ["Department of Computer Science, Oregon State University Corvallis, OR 97331", "Dept. of Computer Science, Oregon State University, Corvallis, OR"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Computer Science, Oregon State University Corvallis, OR 97331", "institution_ids": ["https://openalex.org/I131249849"]}, {"raw_affiliation_string": "Dept. of Computer Science, Oregon State University, Corvallis, OR", "institution_ids": ["https://openalex.org/I131249849"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": ["https://openalex.org/A5067135220"], "corresponding_institution_ids": ["https://openalex.org/I131249849"], "apc_list": null, "apc_paid": null, "fwci": 37.8594, "has_fulltext": true, "cited_by_count": 1452, "citation_normalized_percentile": {"value": 0.99795009, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 99, "max": 100}, "biblio": {"volume": "13", "issue": null, "first_page": "227", "last_page": "303"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10906", "display_name": "AI-based Problem Solving and Planning", "score": 0.9926000237464905, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12072", "display_name": "Machine Learning and Algorithms", "score": 0.9907000064849854, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.8804881572723389}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8063614368438721}, {"id": "https://openalex.org/keywords/markov-decision-process", "display_name": "Markov decision process", "score": 0.7635574340820312}, {"id": "https://openalex.org/keywords/hierarchy", "display_name": "Hierarchy", "score": 0.7333095669746399}, {"id": "https://openalex.org/keywords/bellman-equation", "display_name": "Bellman equation", "score": 0.5801243782043457}, {"id": "https://openalex.org/keywords/programmer", "display_name": "Programmer", "score": 0.5575916767120361}, {"id": "https://openalex.org/keywords/decomposition", "display_name": "Decomposition", "score": 0.5573208928108215}, {"id": "https://openalex.org/keywords/function", "display_name": "Function (biology)", "score": 0.5270436406135559}, {"id": "https://openalex.org/keywords/semantics", "display_name": "Semantics (computer science)", "score": 0.5186090469360352}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.4317891597747803}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.36747264862060547}, {"id": "https://openalex.org/keywords/theoretical-computer-science", "display_name": "Theoretical computer science", "score": 0.326183021068573}, {"id": "https://openalex.org/keywords/markov-process", "display_name": "Markov process", "score": 0.26930999755859375}, {"id": "https://openalex.org/keywords/mathematical-optimization", "display_name": "Mathematical optimization", "score": 0.22215846180915833}, {"id": "https://openalex.org/keywords/programming-language", "display_name": "Programming language", "score": 0.19403472542762756}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.07082667946815491}], "concepts": [{"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.8804881572723389}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8063614368438721}, {"id": "https://openalex.org/C106189395", "wikidata": "https://www.wikidata.org/wiki/Q176789", "display_name": "Markov decision process", "level": 3, "score": 0.7635574340820312}, {"id": "https://openalex.org/C31170391", "wikidata": "https://www.wikidata.org/wiki/Q188619", "display_name": "Hierarchy", "level": 2, "score": 0.7333095669746399}, {"id": "https://openalex.org/C14646407", "wikidata": "https://www.wikidata.org/wiki/Q1430750", "display_name": "Bellman equation", "level": 2, "score": 0.5801243782043457}, {"id": "https://openalex.org/C2778514511", "wikidata": "https://www.wikidata.org/wiki/Q1374194", "display_name": "Programmer", "level": 2, "score": 0.5575916767120361}, {"id": "https://openalex.org/C124681953", "wikidata": "https://www.wikidata.org/wiki/Q339062", "display_name": "Decomposition", "level": 2, "score": 0.5573208928108215}, {"id": "https://openalex.org/C14036430", "wikidata": "https://www.wikidata.org/wiki/Q3736076", "display_name": "Function (biology)", "level": 2, "score": 0.5270436406135559}, {"id": "https://openalex.org/C184337299", "wikidata": "https://www.wikidata.org/wiki/Q1437428", "display_name": "Semantics (computer science)", "level": 2, "score": 0.5186090469360352}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.4317891597747803}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.36747264862060547}, {"id": "https://openalex.org/C80444323", "wikidata": "https://www.wikidata.org/wiki/Q2878974", "display_name": "Theoretical computer science", "level": 1, "score": 0.326183021068573}, {"id": "https://openalex.org/C159886148", "wikidata": "https://www.wikidata.org/wiki/Q176645", "display_name": "Markov process", "level": 2, "score": 0.26930999755859375}, {"id": "https://openalex.org/C126255220", "wikidata": "https://www.wikidata.org/wiki/Q141495", "display_name": "Mathematical optimization", "level": 1, "score": 0.22215846180915833}, {"id": "https://openalex.org/C199360897", "wikidata": "https://www.wikidata.org/wiki/Q9143", "display_name": "Programming language", "level": 1, "score": 0.19403472542762756}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.07082667946815491}, {"id": "https://openalex.org/C105795698", "wikidata": "https://www.wikidata.org/wiki/Q12483", "display_name": "Statistics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C78458016", "wikidata": "https://www.wikidata.org/wiki/Q840400", "display_name": "Evolutionary biology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C34447519", "wikidata": "https://www.wikidata.org/wiki/Q179522", "display_name": "Market economy", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C162324750", "wikidata": "https://www.wikidata.org/wiki/Q8134", "display_name": "Economics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C18903297", "wikidata": "https://www.wikidata.org/wiki/Q7150", "display_name": "Ecology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 7, "locations": [{"id": "doi:10.1613/jair.639", "is_oa": true, "landing_page_url": "https://doi.org/10.1613/jair.639", "pdf_url": "https://jair.org/index.php/jair/article/download/10266/24463", "source": {"id": "https://openalex.org/S139930977", "display_name": "Journal of Artificial Intelligence Research", "issn_l": "1076-9757", "issn": ["1076-9757", "1943-5037"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310315760", "host_organization_name": "AI Access Foundation", "host_organization_lineage": ["https://openalex.org/P4310315760"], "host_organization_lineage_names": ["AI Access Foundation"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Journal of Artificial Intelligence Research", "raw_type": "journal-article"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.32.8206", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.32.8206", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://www.cs.cmu.edu/afs/cs/project/jair/pub/volume13/dietterich00a.ps.Z", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.341.6360", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.341.6360", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://arxiv.org/pdf/cs/9905014v1.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.45.791", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.45.791", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "ftp://ftp.cs.orst.edu/pub/tgd/papers/tr-big-maxq.ps.gz", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.48.9069", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.9069", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "ftp://ftp.cs.orst.edu/pub/tgd/papers/mlj-maxq.ps.gz", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.68.4694", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.68.4694", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://www.cs.cmu.edu/afs/cs/project/jair/pub/volume13/dietterich00a.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.8.2761", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.8.2761", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://www.cs.orst.edu/~tgd/publications/tr-maxq.ps.gz", "raw_type": "text"}], "best_oa_location": {"id": "doi:10.1613/jair.639", "is_oa": true, "landing_page_url": "https://doi.org/10.1613/jair.639", "pdf_url": "https://jair.org/index.php/jair/article/download/10266/24463", "source": {"id": "https://openalex.org/S139930977", "display_name": "Journal of Artificial Intelligence Research", "issn_l": "1076-9757", "issn": ["1076-9757", "1943-5037"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310315760", "host_organization_name": "AI Access Foundation", "host_organization_lineage": ["https://openalex.org/P4310315760"], "host_organization_lineage_names": ["AI Access Foundation"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Journal of Artificial Intelligence Research", "raw_type": "journal-article"}, "sustainable_development_goals": [{"display_name": "Peace, Justice and strong institutions", "id": "https://metadata.un.org/sdg/16", "score": 0.8100000023841858}], "awards": [{"id": "https://openalex.org/G3399502651", "display_name": "Understanding and Scaling-Up Machine Learning Algorithms", "funder_award_id": "9626584", "funder_id": "https://openalex.org/F4320306076", "funder_display_name": "National Science Foundation"}, {"id": "https://openalex.org/G8876996369", "display_name": null, "funder_award_id": "N00014", "funder_id": "https://openalex.org/F4320337345", "funder_display_name": "Office of Naval Research"}], "funders": [{"id": "https://openalex.org/F4320306076", "display_name": "National Science Foundation", "ror": "https://ror.org/021nxhr62"}, {"id": "https://openalex.org/F4320337345", "display_name": "Office of Naval Research", "ror": "https://ror.org/00rk2pe57"}], "has_content": {"grobid_xml": true, "pdf": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W2121517924.pdf", "grobid_xml": "https://content.openalex.org/works/W2121517924.grobid-xml"}, "referenced_works_count": 52, "referenced_works": ["https://openalex.org/W32403112", "https://openalex.org/W50425026", "https://openalex.org/W141456974", "https://openalex.org/W1487586669", "https://openalex.org/W1488730473", "https://openalex.org/W1511612612", "https://openalex.org/W1515851193", "https://openalex.org/W1557517019", "https://openalex.org/W1557798492", "https://openalex.org/W1568042657", "https://openalex.org/W1576452626", "https://openalex.org/W1595483645", "https://openalex.org/W1631187438", "https://openalex.org/W1650504995", "https://openalex.org/W1981627423", "https://openalex.org/W2012036715", "https://openalex.org/W2020149918", "https://openalex.org/W2028145673", "https://openalex.org/W2037210683", "https://openalex.org/W2061504687", "https://openalex.org/W2101217972", "https://openalex.org/W2102000945", "https://openalex.org/W2110415190", "https://openalex.org/W2112386567", "https://openalex.org/W2116502002", "https://openalex.org/W2123956999", "https://openalex.org/W2150339816", "https://openalex.org/W2158548602", "https://openalex.org/W2159080219", "https://openalex.org/W2160371091", "https://openalex.org/W2165131254", "https://openalex.org/W2284276045", "https://openalex.org/W2341171179", "https://openalex.org/W3011120880", "https://openalex.org/W3139377883", "https://openalex.org/W4233061323", "https://openalex.org/W4245266792", "https://openalex.org/W4248851720", "https://openalex.org/W4307347247", "https://openalex.org/W6631026904", "https://openalex.org/W6634528131", "https://openalex.org/W6636774213", "https://openalex.org/W6645670200", "https://openalex.org/W6655395985", "https://openalex.org/W6659638478", "https://openalex.org/W6677037905", "https://openalex.org/W6683443546", "https://openalex.org/W6683821272", "https://openalex.org/W6695925467", "https://openalex.org/W6704298589", "https://openalex.org/W6775686901", "https://openalex.org/W7036529589"], "related_works": ["https://openalex.org/W3096874164", "https://openalex.org/W2976657239", "https://openalex.org/W2937181779", "https://openalex.org/W1985560493", "https://openalex.org/W2357975469", "https://openalex.org/W2145363145", "https://openalex.org/W1626977535", "https://openalex.org/W2341346307", "https://openalex.org/W4225269853", "https://openalex.org/W2386410636"], "abstract_inverted_index": {"This": [0, 174, 184], "paper": [1, 185, 209, 249, 324, 335], "presents": [2, 210], "a": [3, 20, 55, 58, 61, 64, 71, 226, 233, 257, 274, 289, 312, 338, 344], "new": [4], "approach": [5], "to": [6, 124, 155, 225, 273, 303, 315, 340], "hierarchical": [7, 72, 81, 351], "reinforcement": [8, 82, 128, 352], "learning": [9, 83, 214], "based": [10, 93], "on": [11, 80, 94, 193], "decomposing": [12, 26], "the": [13, 27, 31, 39, 43, 50, 67, 95, 98, 115, 118, 137, 147, 164, 171, 178, 182, 187, 202, 239, 242, 251, 292, 316, 326, 334, 347], "target": [14, 32], "Markov": [15], "decision": [16], "process": [17], "(MDP)": [18], "into": [19, 34], "hierarchy": [21, 165], "of": [22, 30, 38, 42, 66, 70, 120, 140, 170, 181, 205, 228, 241, 245, 259, 291, 320, 328, 346], "smaller": [23, 44], "MDPs": [24, 162], "and": [25, 76, 87, 89, 104, 197, 217, 254, 264, 305, 343], "value": [28, 40, 68, 132, 138, 293], "function": [29, 69, 133, 139, 294], "MDP": [33], "an": [35, 211, 296, 307], "additive": [36], "combination": [37], "functions": [41], "MDPs.": [45], "The": [46, 130, 150, 208, 248, 284, 323], "decomposition,": [47, 52], "known": [48, 231], "as": [49, 232], "MAXQ": [51, 74, 131, 188, 252, 287], "has": [53, 295], "both": [54], "procedural": [56], "semantics---as": [57, 63], "subroutine": [59], "hierarchy---and": [60], "declarative": [62], "representation": [65, 253, 290], "policy.": [73], "unifies": [75], "extends": [77], "previous": [78], "work": [79, 342], "by": [84], "Singh,": [85], "Kaelbling,": [86], "Dayan": [88], "Hinton.": [90], "It": [91], "is": [92, 144, 175], "assumption": [96], "that": [97, 107, 122, 143, 160, 219, 267, 286], "programmer": [99, 116], "can": [100, 135, 166], "identify": [101], "useful": [102], "subgoals": [103], "define": [105], "subtasks": [106], "achieve": [108], "these": [109], "subgoals.": [110], "By": [111], "defining": [112], "such": [113], "subgoals,": [114], "constrains": [117], "set": [119], "policies": [121], "need": [123], "be": [125], "considered": [126], "during": [127], "learning.": [129, 283, 353], "decomposition": [134, 151], "represent": [136], "any": [141], "policy": [142, 230, 277, 310, 317, 321], "consistent": [145], "with": [146, 222, 337], "given": [148], "hierarchy.": [149], "also": [152], "creates": [153], "opportunities": [154], "exploit": [156], "state": [157, 172, 206, 246, 270], "abstractions,": [158], "so": [159], "individual": [161], "within": [163], "ignore": [167], "large": [168], "parts": [169], "space.": [173], "important": [176, 297], "for": [177, 201], "practical": [179], "application": [180], "method.": [183], "defines": [186], "hierarchy,": [189], "proves": [190, 218], "formal": [191], "results": [192], "its": [194], "representational": [195], "power,": [196], "establishes": [198], "five": [199, 243], "conditions": [200], "safe": [203], "use": [204], "abstractions.": [207], "online": [212], "model-free": [213], "algorithm,": [215], "MAXQ-Q,": [216], "it": [220, 299, 301], "converges": [221, 272], "probability": [223], "1": [224], "kind": [227], "locally-optimal": [229], "recursively": [234, 275], "optimal": [235, 276], "policy,": [236], "even": [237], "in": [238, 261, 350], "presence": [240], "kinds": [244], "abstraction.": [247], "evaluates": [250], "MAXQ-Q": [255, 268], "through": [256], "series": [258], "experiments": [260], "three": [262], "domains": [263], "shows": [265], "experimentally": [266], "(with": [269], "abstractions)": [271], "much": [278], "faster": [279], "than": [280], "flat": [281], "Q": [282], "fact": [285], "learns": [288], "benefit:": [298], "makes": [300], "possible": [302], "compute": [304], "execute": [306], "improved,": [308], "non-hierarchical": [309, 330], "via": [311], "procedure": [313], "similar": [314], "improvement": [318], "step": [319], "iteration.": [322], "demonstrates": [325], "effectiveness": [327], "this": [329], "execution": [331], "experimentally.": [332], "Finally,": [333], "concludes": [336], "comparison": [339], "related": [341], "discussion": [345], "design": [348], "tradeoffs": [349]}, "counts_by_year": [{"year": 2026, "cited_by_count": 20}, {"year": 2025, "cited_by_count": 58}, {"year": 2024, "cited_by_count": 63}, {"year": 2023, "cited_by_count": 68}, {"year": 2022, "cited_by_count": 78}, {"year": 2021, "cited_by_count": 110}, {"year": 2020, "cited_by_count": 95}, {"year": 2019, "cited_by_count": 98}, {"year": 2018, "cited_by_count": 82}, {"year": 2017, "cited_by_count": 81}, {"year": 2016, "cited_by_count": 52}, {"year": 2015, "cited_by_count": 46}, {"year": 2014, "cited_by_count": 41}, {"year": 2013, "cited_by_count": 43}, {"year": 2012, "cited_by_count": 53}], "updated_date": "2026-07-04T07:58:01.006859", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3156295478", "doi": "https://doi.org/10.1007/s10462-021-09996-w", "title": "Multi-agent deep reinforcement learning: a survey", "display_name": "Multi-agent deep reinforcement learning: a survey", "relevance_score": 437.53613, "publication_year": 2021, "publication_date": "2021-04-15", "ids": {"openalex": "https://openalex.org/W3156295478", "doi": "https://doi.org/10.1007/s10462-021-09996-w", "mag": "3156295478"}, "language": "en", "primary_location": {"id": "doi:10.1007/s10462-021-09996-w", "is_oa": true, "landing_page_url": "https://doi.org/10.1007/s10462-021-09996-w", "pdf_url": "https://link.springer.com/content/pdf/10.1007/s10462-021-09996-w.pdf", "source": {"id": "https://openalex.org/S122814990", "display_name": "Artificial Intelligence Review", "issn_l": "0269-2821", "issn": ["0269-2821", "1573-7462"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Artificial Intelligence Review", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "hybrid", "oa_url": "https://link.springer.com/content/pdf/10.1007/s10462-021-09996-w.pdf", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5020980906", "display_name": "Sven Gronauer", "orcid": "https://orcid.org/0000-0002-0047-5116"}, "institutions": [{"id": "https://openalex.org/I62916508", "display_name": "Technical University of Munich", "ror": "https://ror.org/02kkvpp62", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I62916508"]}], "countries": ["DE"], "is_corresponding": true, "raw_author_name": "Sven Gronauer", "raw_affiliation_strings": ["Department of Electrical and Computer Engineering, Technical University of Munich (TUM), Arcisstr. 21, 80333, Munich, Germany", "Department of Electrical and Computer Engineering, Technical University of Munich (TUM), Munich, Germany"], "raw_orcid": "https://orcid.org/0000-0002-0047-5116", "affiliations": [{"raw_affiliation_string": "Department of Electrical and Computer Engineering, Technical University of Munich (TUM), Arcisstr. 21, 80333, Munich, Germany", "institution_ids": ["https://openalex.org/I62916508"]}, {"raw_affiliation_string": "Department of Electrical and Computer Engineering, Technical University of Munich (TUM), Munich, Germany", "institution_ids": ["https://openalex.org/I62916508"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5074346065", "display_name": "Klaus Diepold", "orcid": "https://orcid.org/0000-0003-0439-7511"}, "institutions": [{"id": "https://openalex.org/I62916508", "display_name": "Technical University of Munich", "ror": "https://ror.org/02kkvpp62", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I62916508"]}], "countries": ["DE"], "is_corresponding": true, "raw_author_name": "Klaus Diepold", "raw_affiliation_strings": ["Department of Electrical and Computer Engineering, Technical University of Munich (TUM), Arcisstr. 21, 80333, Munich, Germany", "Department of Electrical and Computer Engineering, Technical University of Munich (TUM), Munich, Germany"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Electrical and Computer Engineering, Technical University of Munich (TUM), Arcisstr. 21, 80333, Munich, Germany", "institution_ids": ["https://openalex.org/I62916508"]}, {"raw_affiliation_string": "Department of Electrical and Computer Engineering, Technical University of Munich (TUM), Munich, Germany", "institution_ids": ["https://openalex.org/I62916508"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": ["https://openalex.org/A5020980906", "https://openalex.org/A5074346065"], "corresponding_institution_ids": ["https://openalex.org/I62916508"], "apc_list": {"value": 2490, "currency": "EUR", "value_usd": 3090}, "apc_paid": {"value": 2490, "currency": "EUR", "value_usd": 3090}, "fwci": 60.8864, "has_fulltext": true, "cited_by_count": 830, "citation_normalized_percentile": {"value": 0.99932882, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 99, "max": 100}, "biblio": {"volume": "55", "issue": "2", "first_page": "895", "last_page": "943"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11975", "display_name": "Evolutionary Algorithms and Applications", "score": 0.9700000286102295, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9667999744415283, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8858934640884399}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7122007012367249}, {"id": "https://openalex.org/keywords/reinforcement", "display_name": "Reinforcement", "score": 0.48242148756980896}, {"id": "https://openalex.org/keywords/domain", "display_name": "Domain (mathematical analysis)", "score": 0.4564315378665924}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.4517020285129547}, {"id": "https://openalex.org/keywords/data-science", "display_name": "Data science", "score": 0.34246909618377686}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.08391794562339783}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8858934640884399}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7122007012367249}, {"id": "https://openalex.org/C67203356", "wikidata": "https://www.wikidata.org/wiki/Q1321905", "display_name": "Reinforcement", "level": 2, "score": 0.48242148756980896}, {"id": "https://openalex.org/C36503486", "wikidata": "https://www.wikidata.org/wiki/Q11235244", "display_name": "Domain (mathematical analysis)", "level": 2, "score": 0.4564315378665924}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.4517020285129547}, {"id": "https://openalex.org/C2522767166", "wikidata": "https://www.wikidata.org/wiki/Q2374463", "display_name": "Data science", "level": 1, "score": 0.34246909618377686}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.08391794562339783}, {"id": "https://openalex.org/C66938386", "wikidata": "https://www.wikidata.org/wiki/Q633538", "display_name": "Structural engineering", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 4, "locations": [{"id": "doi:10.1007/s10462-021-09996-w", "is_oa": true, "landing_page_url": "https://doi.org/10.1007/s10462-021-09996-w", "pdf_url": "https://link.springer.com/content/pdf/10.1007/s10462-021-09996-w.pdf", "source": {"id": "https://openalex.org/S122814990", "display_name": "Artificial Intelligence Review", "issn_l": "0269-2821", "issn": ["0269-2821", "1573-7462"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Artificial Intelligence Review", "raw_type": "journal-article"}, {"id": "pmh:oai:mediatum.ub.tum.de:node/1618457", "is_oa": true, "landing_page_url": "https://mediatum.ub.tum.de/1618457", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400453", "display_name": "mediaTUM \u2013 the media and publications repository of the Technical University Munich (Technical University Munich)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I62916508", "host_organization_name": "Technical University of Munich", "host_organization_lineage": ["https://openalex.org/I62916508"], "host_organization_lineage_names": [], "type": "repository"}, "license": "other-oa", "license_id": "https://openalex.org/licenses/other-oa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "article"}, {"id": "pmh:oai:mediatum.ub.tum.de:node/1639959", "is_oa": false, "landing_page_url": "https://mediatum.ub.tum.de/1639959", "pdf_url": null, "source": {"id": "https://openalex.org/S4377196330", "display_name": "mediaTUM  (Technical University of Munich)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I62916508", "host_organization_name": "Technical University of Munich", "host_organization_lineage": ["https://openalex.org/I62916508"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "article"}, {"id": "pmh:oai:mediatum.ub.tum.de:node/1734945", "is_oa": false, "landing_page_url": "https://mediatum.ub.tum.de/1734945", "pdf_url": null, "source": {"id": "https://openalex.org/S4377196330", "display_name": "mediaTUM  (Technical University of Munich)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I62916508", "host_organization_name": "Technical University of Munich", "host_organization_lineage": ["https://openalex.org/I62916508"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "article"}], "best_oa_location": {"id": "doi:10.1007/s10462-021-09996-w", "is_oa": true, "landing_page_url": "https://doi.org/10.1007/s10462-021-09996-w", "pdf_url": "https://link.springer.com/content/pdf/10.1007/s10462-021-09996-w.pdf", "source": {"id": "https://openalex.org/S122814990", "display_name": "Artificial Intelligence Review", "issn_l": "0269-2821", "issn": ["0269-2821", "1573-7462"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Artificial Intelligence Review", "raw_type": "journal-article"}, "sustainable_development_goals": [{"display_name": "Partnerships for the goals", "id": "https://metadata.un.org/sdg/17", "score": 0.4000000059604645}], "awards": [], "funders": [{"id": "https://openalex.org/F4320323383", "display_name": "Technische Universit\u00e4t M\u00fcnchen", "ror": "https://ror.org/02kkvpp62"}], "has_content": {"grobid_xml": true, "pdf": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W3156295478.pdf", "grobid_xml": "https://content.openalex.org/works/W3156295478.grobid-xml"}, "referenced_works_count": 237, "referenced_works": ["https://openalex.org/W56117469", "https://openalex.org/W137071854", "https://openalex.org/W156814138", "https://openalex.org/W345119801", "https://openalex.org/W567721252", "https://openalex.org/W1192553058", "https://openalex.org/W1457482454", "https://openalex.org/W1513468570", "https://openalex.org/W1518858799", "https://openalex.org/W1528676759", "https://openalex.org/W1537415400", "https://openalex.org/W1542941925", "https://openalex.org/W1555689267", "https://openalex.org/W1560074431", "https://openalex.org/W1564534945", "https://openalex.org/W1579184372", "https://openalex.org/W1641379095", "https://openalex.org/W1777239053", "https://openalex.org/W1845972764", "https://openalex.org/W1908253641", "https://openalex.org/W1933349210", "https://openalex.org/W1963523946", "https://openalex.org/W1973039793", "https://openalex.org/W1982678075", "https://openalex.org/W2000514530", "https://openalex.org/W2012812921", "https://openalex.org/W2034806191", "https://openalex.org/W2044212084", "https://openalex.org/W2048382412", "https://openalex.org/W2064675550", "https://openalex.org/W2076337359", "https://openalex.org/W2087322096", "https://openalex.org/W2088956500", "https://openalex.org/W2096145798", "https://openalex.org/W2097381042", "https://openalex.org/W2098432798", "https://openalex.org/W2099618002", "https://openalex.org/W2102764452", "https://openalex.org/W2103561211", "https://openalex.org/W2105695492", "https://openalex.org/W2106713015", "https://openalex.org/W2107544712", "https://openalex.org/W2107726111", "https://openalex.org/W2108449787", "https://openalex.org/W2108892923", "https://openalex.org/W2109910161", "https://openalex.org/W2110906765", "https://openalex.org/W2120327309", "https://openalex.org/W2120846115", "https://openalex.org/W2122253967", "https://openalex.org/W2122763142", "https://openalex.org/W2126222949", "https://openalex.org/W2129633982", "https://openalex.org/W2129936995", "https://openalex.org/W2132683224", "https://openalex.org/W2134779831", "https://openalex.org/W2136202932", "https://openalex.org/W2142505627", "https://openalex.org/W2145339207", "https://openalex.org/W2147492008", "https://openalex.org/W2147539438", "https://openalex.org/W2151710526", "https://openalex.org/W2154902490", "https://openalex.org/W2162022118", "https://openalex.org/W2166533447", "https://openalex.org/W2171026506", "https://openalex.org/W2171234133", "https://openalex.org/W2171863654", "https://openalex.org/W2226396244", "https://openalex.org/W2257979135", "https://openalex.org/W2290743114", "https://openalex.org/W2291986326", "https://openalex.org/W2292533394", "https://openalex.org/W2296073425", "https://openalex.org/W2395575420", "https://openalex.org/W2396783599", "https://openalex.org/W2402402867", "https://openalex.org/W2417786368", "https://openalex.org/W2419612459", "https://openalex.org/W2462906003", "https://openalex.org/W2466211196", "https://openalex.org/W2475089067", "https://openalex.org/W2476301537", "https://openalex.org/W2480004914", "https://openalex.org/W2507578125", "https://openalex.org/W2530849036", "https://openalex.org/W2535584654", "https://openalex.org/W2550182557", "https://openalex.org/W2562637642", "https://openalex.org/W2563399268", "https://openalex.org/W2564324149", "https://openalex.org/W2575731723", "https://openalex.org/W2581240229", "https://openalex.org/W2602275733", "https://openalex.org/W2603088459", "https://openalex.org/W2603266952", "https://openalex.org/W2604283518", "https://openalex.org/W2604402446", "https://openalex.org/W2604704722", "https://openalex.org/W2614309179", "https://openalex.org/W2615688110", "https://openalex.org/W2617547828", "https://openalex.org/W2620645529", "https://openalex.org/W2623431351", "https://openalex.org/W2730328371", "https://openalex.org/W2736601468", "https://openalex.org/W2740377041", "https://openalex.org/W2745868649", "https://openalex.org/W2747213132", "https://openalex.org/W2747402019", "https://openalex.org/W2749807327", "https://openalex.org/W2750605955", "https://openalex.org/W2750866482", "https://openalex.org/W2754517384", "https://openalex.org/W2758442112", "https://openalex.org/W2763208138", "https://openalex.org/W2766184602", "https://openalex.org/W2768629321", "https://openalex.org/W2776126823", "https://openalex.org/W2776777826", "https://openalex.org/W2781238083", "https://openalex.org/W2783268317", "https://openalex.org/W2789386227", "https://openalex.org/W2798511001", "https://openalex.org/W2807741983", "https://openalex.org/W2810602713", "https://openalex.org/W2813428123", "https://openalex.org/W2886000153", "https://openalex.org/W2889987506", "https://openalex.org/W2891287243", "https://openalex.org/W2892013712", "https://openalex.org/W2894389820", "https://openalex.org/W2895896816", "https://openalex.org/W2895921264", "https://openalex.org/W2898035736", "https://openalex.org/W2902907165", "https://openalex.org/W2904455790", "https://openalex.org/W2904948755", "https://openalex.org/W2908261578", "https://openalex.org/W2909611215", "https://openalex.org/W2911616846", "https://openalex.org/W2912793366", "https://openalex.org/W2913326990", "https://openalex.org/W2913654295", "https://openalex.org/W2914351253", "https://openalex.org/W2915117209", "https://openalex.org/W2920700222", "https://openalex.org/W2921955147", "https://openalex.org/W2932081708", "https://openalex.org/W2944827102", "https://openalex.org/W2945582565", "https://openalex.org/W2946606218", "https://openalex.org/W2948099544", "https://openalex.org/W2949201811", "https://openalex.org/W2949267040", "https://openalex.org/W2949464762", "https://openalex.org/W2950395160", "https://openalex.org/W2951896791", "https://openalex.org/W2962764167", "https://openalex.org/W2962766710", "https://openalex.org/W2962852262", "https://openalex.org/W2962869735", "https://openalex.org/W2962938168", "https://openalex.org/W2962938178", "https://openalex.org/W2963041255", "https://openalex.org/W2963065757", "https://openalex.org/W2963147362", "https://openalex.org/W2963155490", "https://openalex.org/W2963162637", "https://openalex.org/W2963170138", "https://openalex.org/W2963293542", "https://openalex.org/W2963368198", "https://openalex.org/W2963385935", "https://openalex.org/W2963390684", "https://openalex.org/W2963455109", "https://openalex.org/W2963485523", "https://openalex.org/W2963497400", "https://openalex.org/W2963536577", "https://openalex.org/W2963627051", "https://openalex.org/W2963637944", "https://openalex.org/W2963658727", "https://openalex.org/W2963681240", "https://openalex.org/W2963689090", "https://openalex.org/W2963715810", "https://openalex.org/W2963717208", "https://openalex.org/W2963788414", "https://openalex.org/W2963809389", "https://openalex.org/W2963881016", "https://openalex.org/W2963887494", "https://openalex.org/W2964014087", "https://openalex.org/W2964043796", "https://openalex.org/W2964161785", "https://openalex.org/W2964321324", "https://openalex.org/W2964345382", "https://openalex.org/W2964915587", "https://openalex.org/W2964937097", "https://openalex.org/W2965433979", "https://openalex.org/W2966371608", "https://openalex.org/W2967558504", "https://openalex.org/W2968526727", "https://openalex.org/W2969456553", "https://openalex.org/W2970603867", "https://openalex.org/W2971094937", "https://openalex.org/W2981038142", "https://openalex.org/W2982041656", "https://openalex.org/W2982316857", "https://openalex.org/W2982320652", "https://openalex.org/W2987046690", "https://openalex.org/W2989068617", "https://openalex.org/W2991046523", "https://openalex.org/W2994864869", "https://openalex.org/W2995453501", "https://openalex.org/W2995520132", "https://openalex.org/W2995815314", "https://openalex.org/W2995874959", "https://openalex.org/W2996037775", "https://openalex.org/W2996343955", "https://openalex.org/W3009413787", "https://openalex.org/W3012445938", "https://openalex.org/W3033076904", "https://openalex.org/W3037207827", "https://openalex.org/W3037211759", "https://openalex.org/W3037940279", "https://openalex.org/W3048435052", "https://openalex.org/W3098163860", "https://openalex.org/W3099134564", "https://openalex.org/W3099518626", "https://openalex.org/W3100789280", "https://openalex.org/W3104860527", "https://openalex.org/W3110309042", "https://openalex.org/W3127047144", "https://openalex.org/W3133280296", "https://openalex.org/W4206354906", "https://openalex.org/W4232967792", "https://openalex.org/W4246078117", "https://openalex.org/W4252284432", "https://openalex.org/W4254547512"], "related_works": ["https://openalex.org/W4310083477", "https://openalex.org/W2328553770", "https://openalex.org/W2920061524", "https://openalex.org/W1977959518", "https://openalex.org/W2038908348", "https://openalex.org/W2107890255", "https://openalex.org/W2106552856", "https://openalex.org/W2145821588", "https://openalex.org/W2086122291", "https://openalex.org/W1987513656"], "abstract_inverted_index": {"Abstract": [0], "The": [1], "advances": [2], "in": [3, 10, 51, 118, 132, 163], "reinforcement": [4, 28, 57, 70], "learning": [5, 29, 71], "have": [6], "recorded": [7], "sublime": [8], "success": [9], "various": [11], "domains.": [12], "Although": [13], "the": [14, 34, 48, 52, 79, 83, 86, 97, 112, 133], "multi-agent": [15, 27, 55, 75, 134], "domain": [16, 135], "has": [17], "been": [18], "overshadowed": [19], "by": [20], "its": [21], "single-agent": [22], "counterpart": [23], "during": [24], "this": [25, 149, 164], "progress,": [26], "gains": [30], "rapid": [31], "traction,": [32], "and": [33, 121, 136, 156], "latest": [35], "accomplishments": [36], "address": [37], "problems": [38], "with": [39, 73, 144], "real-world": [40], "complexity.": [41], "This": [42], "article": [43], "provides": [44], "an": [45], "overview": [46], "of": [47, 54, 99, 115], "current": [49], "developments": [50], "field": [53], "deep": [56, 69], "learning.": [58], "We": [59], "focus": [60], "primarily": [61], "on": [62], "literature": [63], "from": [64], "recent": [65], "years": [66], "that": [67, 81, 102, 129, 139], "combines": [68], "methods": [72, 138], "a": [74], "scenario.": [76], "To": [77, 147], "survey": [78], "works": [80], "constitute": [82], "contemporary": [84], "landscape,": [85], "main": [87], "contents": [88], "are": [89, 103, 140], "divided": [90], "into": [91], "three": [92], "parts.": [93], "First,": [94], "we": [95, 110, 125, 151], "analyze": [96], "structure": [98], "training": [100], "schemes": [101], "applied": [104], "to": [105, 142], "train": [106], "multiple": [107], "agents.": [108], "Second,": [109], "consider": [111], "emergent": [113], "patterns": [114], "agent": [116], "behavior": [117], "cooperative,": [119], "competitive": [120], "mixed": [122], "scenarios.": [123], "Third,": [124], "systematically": [126], "enumerate": [127], "challenges": [128], "exclusively": [130], "arise": [131], "review": [137], "leveraged": [141], "cope": [143], "these": [145], "challenges.": [146], "conclude": [148], "survey,": [150], "discuss": [152], "advances,": [153], "identify": [154], "trends,": [155], "outline": [157], "possible": [158], "directions": [159], "for": [160], "future": [161], "work": [162], "research": [165], "area.": [166]}, "counts_by_year": [{"year": 2026, "cited_by_count": 114}, {"year": 2025, "cited_by_count": 269}, {"year": 2024, "cited_by_count": 223}, {"year": 2023, "cited_by_count": 149}, {"year": 2022, "cited_by_count": 60}, {"year": 2021, "cited_by_count": 15}], "updated_date": "2026-07-04T07:58:01.006859", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2963043696", "doi": "https://doi.org/10.1609/aaai.v32i1.11596", "title": "Learning to Generalize: Meta-Learning for Domain Generalization", "display_name": "Learning to Generalize: Meta-Learning for Domain Generalization", "relevance_score": 389.16257, "publication_year": 2018, "publication_date": "2018-04-29", "ids": {"openalex": "https://openalex.org/W2963043696", "doi": "https://doi.org/10.1609/aaai.v32i1.11596", "mag": "2963043696"}, "language": "en", "primary_location": {"id": "doi:10.1609/aaai.v32i1.11596", "is_oa": true, "landing_page_url": "https://doi.org/10.1609/aaai.v32i1.11596", "pdf_url": null, "source": {"id": "https://openalex.org/S4210191458", "display_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "issn_l": "2159-5399", "issn": ["2159-5399", "2374-3468"], "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320058", "host_organization_name": "Association for the Advancement of Artificial Intelligence", "host_organization_lineage": ["https://openalex.org/P4310320058"], "host_organization_lineage_names": ["Association for the Advancement of Artificial Intelligence"], "type": "conference"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "diamond", "oa_url": "https://doi.org/10.1609/aaai.v32i1.11596", "any_repository_has_fulltext": null}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5100384536", "display_name": "Da Li", "orcid": "https://orcid.org/0000-0002-2101-2989"}, "institutions": [{"id": "https://openalex.org/I166337079", "display_name": "Queen Mary University of London", "ror": "https://ror.org/026zzn846", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I124357947", "https://openalex.org/I166337079"]}], "countries": ["GB"], "is_corresponding": false, "raw_author_name": "Da Li", "raw_affiliation_strings": ["Queen Mary University of London"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Queen Mary University of London", "institution_ids": ["https://openalex.org/I166337079"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5101445001", "display_name": "Yongxin Yang", "orcid": "https://orcid.org/0000-0003-4134-8559"}, "institutions": [{"id": "https://openalex.org/I166337079", "display_name": "Queen Mary University of London", "ror": "https://ror.org/026zzn846", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I124357947", "https://openalex.org/I166337079"]}], "countries": ["GB"], "is_corresponding": false, "raw_author_name": "Yongxin Yang", "raw_affiliation_strings": ["Queen Mary University of London"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Queen Mary University of London", "institution_ids": ["https://openalex.org/I166337079"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5046046128", "display_name": "Yi-Zhe Song", "orcid": "https://orcid.org/0000-0001-5908-3275"}, "institutions": [{"id": "https://openalex.org/I166337079", "display_name": "Queen Mary University of London", "ror": "https://ror.org/026zzn846", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I124357947", "https://openalex.org/I166337079"]}], "countries": ["GB"], "is_corresponding": false, "raw_author_name": "Yi-Zhe Song", "raw_affiliation_strings": ["Queen Mary University of London"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Queen Mary University of London", "institution_ids": ["https://openalex.org/I166337079"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5087823932", "display_name": "Timothy M. Hospedales", "orcid": "https://orcid.org/0000-0003-4867-7486"}, "institutions": [{"id": "https://openalex.org/I98677209", "display_name": "University of Edinburgh", "ror": "https://ror.org/01nrxwf90", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I98677209"]}], "countries": ["GB"], "is_corresponding": false, "raw_author_name": "Timothy Hospedales", "raw_affiliation_strings": ["The University of Edinburgh"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "The University of Edinburgh", "institution_ids": ["https://openalex.org/I98677209"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 2, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": 25.904, "has_fulltext": true, "cited_by_count": 1233, "citation_normalized_percentile": {"value": 0.99408051, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 90, "max": 100}, "biblio": {"volume": "32", "issue": "1", "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T11307", "display_name": "Domain Adaptation and Few-Shot Learning", "score": 0.9986000061035156, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T11307", "display_name": "Domain Adaptation and Few-Shot Learning", "score": 0.9986000061035156, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11243", "display_name": "Respiratory viral infections research", "score": 0.9817000031471252, "subfield": {"id": "https://openalex.org/subfields/2713", "display_name": "Epidemiology"}, "field": {"id": "https://openalex.org/fields/27", "display_name": "Medicine"}, "domain": {"id": "https://openalex.org/domains/4", "display_name": "Health Sciences"}}, {"id": "https://openalex.org/T11688", "display_name": "Microbial infections and disease research", "score": 0.9032999873161316, "subfield": {"id": "https://openalex.org/subfields/2404", "display_name": "Microbiology"}, "field": {"id": "https://openalex.org/fields/24", "display_name": "Immunology and Microbiology"}, "domain": {"id": "https://openalex.org/domains/1", "display_name": "Life Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7827876806259155}, {"id": "https://openalex.org/keywords/generalization", "display_name": "Generalization", "score": 0.7400909066200256}, {"id": "https://openalex.org/keywords/domain", "display_name": "Domain (mathematical analysis)", "score": 0.6981216669082642}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6641339063644409}, {"id": "https://openalex.org/keywords/benchmark", "display_name": "Benchmark (surveying)", "score": 0.6569912433624268}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.5908845663070679}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.4971309006214142}, {"id": "https://openalex.org/keywords/meta-learning", "display_name": "Meta learning (computer science)", "score": 0.42636406421661377}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.1337517499923706}, {"id": "https://openalex.org/keywords/task", "display_name": "Task (project management)", "score": 0.12239444255828857}], "concepts": [{"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7827876806259155}, {"id": "https://openalex.org/C177148314", "wikidata": "https://www.wikidata.org/wiki/Q170084", "display_name": "Generalization", "level": 2, "score": 0.7400909066200256}, {"id": "https://openalex.org/C36503486", "wikidata": "https://www.wikidata.org/wiki/Q11235244", "display_name": "Domain (mathematical analysis)", "level": 2, "score": 0.6981216669082642}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6641339063644409}, {"id": "https://openalex.org/C185798385", "wikidata": "https://www.wikidata.org/wiki/Q1161707", "display_name": "Benchmark (surveying)", "level": 2, "score": 0.6569912433624268}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.5908845663070679}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.4971309006214142}, {"id": "https://openalex.org/C2781002164", "wikidata": "https://www.wikidata.org/wiki/Q6822311", "display_name": "Meta learning (computer science)", "level": 3, "score": 0.42636406421661377}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.1337517499923706}, {"id": "https://openalex.org/C2780451532", "wikidata": "https://www.wikidata.org/wiki/Q759676", "display_name": "Task (project management)", "level": 2, "score": 0.12239444255828857}, {"id": "https://openalex.org/C162324750", "wikidata": "https://www.wikidata.org/wiki/Q8134", "display_name": "Economics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C187736073", "wikidata": "https://www.wikidata.org/wiki/Q2920921", "display_name": "Management", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C205649164", "wikidata": "https://www.wikidata.org/wiki/Q1071", "display_name": "Geography", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C13280743", "wikidata": "https://www.wikidata.org/wiki/Q131089", "display_name": "Geodesy", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "doi:10.1609/aaai.v32i1.11596", "is_oa": true, "landing_page_url": "https://doi.org/10.1609/aaai.v32i1.11596", "pdf_url": null, "source": {"id": "https://openalex.org/S4210191458", "display_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "issn_l": "2159-5399", "issn": ["2159-5399", "2374-3468"], "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320058", "host_organization_name": "Association for the Advancement of Artificial Intelligence", "host_organization_lineage": ["https://openalex.org/P4310320058"], "host_organization_lineage_names": ["Association for the Advancement of Artificial Intelligence"], "type": "conference"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "raw_type": "journal-article"}, {"id": "pmh:oai:ojs.aaai.org:article/11596", "is_oa": true, "landing_page_url": "https://ojs.aaai.org/index.php/AAAI/article/view/11596", "pdf_url": "https://ojs.aaai.org/index.php/AAAI/article/download/11596/11455", "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "2159-5399", "raw_type": "info:eu-repo/semantics/article"}], "best_oa_location": {"id": "doi:10.1609/aaai.v32i1.11596", "is_oa": true, "landing_page_url": "https://doi.org/10.1609/aaai.v32i1.11596", "pdf_url": null, "source": {"id": "https://openalex.org/S4210191458", "display_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "issn_l": "2159-5399", "issn": ["2159-5399", "2374-3468"], "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320058", "host_organization_name": "Association for the Advancement of Artificial Intelligence", "host_organization_lineage": ["https://openalex.org/P4310320058"], "host_organization_lineage_names": ["Association for the Advancement of Artificial Intelligence"], "type": "conference"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "raw_type": "journal-article"}, "sustainable_development_goals": [], "awards": [{"id": "https://openalex.org/G1410665336", "display_name": "Deferred Restructuring of Experience in Autonomous Machines", "funder_award_id": "640891", "funder_id": "https://openalex.org/F4320320300", "funder_display_name": "European Commission"}, {"id": "https://openalex.org/G2838024195", "display_name": "UK Robotics and Artificial Intelligence Hub for Offshore Energy Asset Integrity Management", "funder_award_id": "EP/R026173/1", "funder_id": "https://openalex.org/F4320334627", "funder_display_name": "Engineering and Physical Sciences Research Council"}, {"id": "https://openalex.org/G8602921712", "display_name": null, "funder_award_id": "EP/R026173", "funder_id": "https://openalex.org/F4320334627", "funder_display_name": "Engineering and Physical Sciences Research Council"}], "funders": [{"id": "https://openalex.org/F4320320300", "display_name": "European Commission", "ror": "https://ror.org/00k4n6c32"}, {"id": "https://openalex.org/F4320334627", "display_name": "Engineering and Physical Sciences Research Council", "ror": "https://ror.org/0439y7842"}], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 46, "referenced_works": ["https://openalex.org/W96659543", "https://openalex.org/W1703030490", "https://openalex.org/W1848094219", "https://openalex.org/W1852255964", "https://openalex.org/W1882958252", "https://openalex.org/W1920962657", "https://openalex.org/W1929309940", "https://openalex.org/W1982696459", "https://openalex.org/W2097381042", "https://openalex.org/W2106008664", "https://openalex.org/W2119717200", "https://openalex.org/W2145339207", "https://openalex.org/W2155858138", "https://openalex.org/W2163605009", "https://openalex.org/W2167328503", "https://openalex.org/W2174786457", "https://openalex.org/W2427497464", "https://openalex.org/W2558634851", "https://openalex.org/W2562986790", "https://openalex.org/W2581955877", "https://openalex.org/W2604763608", "https://openalex.org/W2616957565", "https://openalex.org/W2726717203", "https://openalex.org/W2741134157", "https://openalex.org/W2753160622", "https://openalex.org/W2756202949", "https://openalex.org/W2763549966", "https://openalex.org/W2953127297", "https://openalex.org/W2963078860", "https://openalex.org/W2963211188", "https://openalex.org/W2963341924", "https://openalex.org/W2963775850", "https://openalex.org/W3146885639", "https://openalex.org/W4230563027", "https://openalex.org/W4298174377", "https://openalex.org/W6601295022", "https://openalex.org/W6639105354", "https://openalex.org/W6645905228", "https://openalex.org/W6684696554", "https://openalex.org/W6717697761", "https://openalex.org/W6725448924", "https://openalex.org/W6743661861", "https://openalex.org/W6745111092", "https://openalex.org/W6780559895", "https://openalex.org/W6793289451", "https://openalex.org/W7075710188"], "related_works": ["https://openalex.org/W3147214434", "https://openalex.org/W2994073215", "https://openalex.org/W3034946435", "https://openalex.org/W3196841879", "https://openalex.org/W2947217676", "https://openalex.org/W3200361725", "https://openalex.org/W2979869797", "https://openalex.org/W4312903001", "https://openalex.org/W3204541964", "https://openalex.org/W3204328054"], "abstract_inverted_index": {"Domain": [0, 26], "shift": [1, 67, 88], "refers": [2], "to": [3, 31, 43, 65, 105, 125], "the": [4, 136], "well": [5, 42, 147], "known": [6], "problem": [7], "that": [8, 62, 103], "a": [9, 20, 49, 59, 76, 140], "model": [10, 61, 77], "trained": [11], "in": [12, 69], "one": [13], "source": [14], "domain": [15, 22, 54, 66, 87, 108, 114], "performs": [16], "poorly": [17], "when": [18], "appliedto": [19], "target": [21], "with": [23, 121], "different": [24], "statistics.": [25], "Generalization": [27], "(DG)": [28], "techniques": [29], "attempt": [30], "alleviate": [32], "this": [33], "issue": [34], "by": [35, 39, 91], "producing": [36], "models": [37, 120], "which": [38], "design": [40], "generalize": [41], "novel": [44, 50, 126], "testing": [45, 94, 113], "domains.": [46, 127], "We": [47, 128], "propose": [48, 75], "meta-learning": [51, 117], "method": [52, 131], "for": [53, 81], "generalization.": [55], "Rather": [56], "than": [57], "designing": [58], "specific": [60], "is": [63], "robust": [64], "as": [68, 146], "most": [70], "previous": [71], "DG": [72], "work,": [73], "we": [74], "agnostic": [78], "training": [79, 90, 107], "procedure": [80, 118], "DG.": [82], "Our": [83], "algorithm": [84], "simulates": [85], "train/test": [86], "during": [89], "synthesizing": [92], "virtual": [93], "domains": [95], "within": [96], "each": [97], "mini-batch.": [98], "The": [99], "meta-optimization": [100], "objective": [101], "requires": [102], "steps": [104], "improve": [106, 112], "performance": [109], "should": [110], "also": [111], "performance.": [115], "This": [116], "trains": [119], "good": [122], "generalization": [123], "ability": [124], "evaluate": [129], "our": [130], "and": [132], "achieve": [133], "state": [134], "of": [135], "art": [137], "results": [138], "on": [139, 151], "recent": [141], "cross-domain": [142], "image": [143], "classification": [144], "benchmark,": [145], "demonstrating": [148], "its": [149], "potential": [150], "two": [152], "classic": [153], "reinforcement": [154], "learning": [155], "tasks.": [156]}, "counts_by_year": [{"year": 2026, "cited_by_count": 50}, {"year": 2025, "cited_by_count": 215}, {"year": 2024, "cited_by_count": 232}, {"year": 2023, "cited_by_count": 246}, {"year": 2022, "cited_by_count": 157}, {"year": 2021, "cited_by_count": 191}, {"year": 2020, "cited_by_count": 92}, {"year": 2019, "cited_by_count": 42}, {"year": 2018, "cited_by_count": 7}, {"year": 2017, "cited_by_count": 1}], "updated_date": "2026-07-03T08:13:44.112507", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2902125520", "doi": "https://doi.org/10.48550/arxiv.1812.00568", "title": "Visual Foresight: Model-Based Deep Reinforcement Learning for Vision-Based Robotic Control", "display_name": "Visual Foresight: Model-Based Deep Reinforcement Learning for Vision-Based Robotic Control", "relevance_score": 376.63608, "publication_year": 2018, "publication_date": "2018-12-03", "ids": {"openalex": "https://openalex.org/W2902125520", "doi": "https://doi.org/10.48550/arxiv.1812.00568", "mag": "2902125520"}, "language": "en", "primary_location": {"id": "pmh:oai:arXiv.org:1812.00568", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1812.00568", "pdf_url": "https://arxiv.org/pdf/1812.00568", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": null}, "type": "preprint", "indexed_in": ["arxiv", "datacite"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": "https://arxiv.org/pdf/1812.00568", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5029017885", "display_name": "Frederik Ebert", "orcid": null}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Ebert, Frederik", "raw_affiliation_strings": [], "raw_orcid": null, "affiliations": []}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5005431772", "display_name": "Chelsea Finn", "orcid": "https://orcid.org/0000-0001-6298-0874"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Finn, Chelsea", "raw_affiliation_strings": [], "raw_orcid": null, "affiliations": []}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5069471785", "display_name": "Sudeep Dasari", "orcid": "https://orcid.org/0000-0003-2600-2779"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Dasari, Sudeep", "raw_affiliation_strings": [], "raw_orcid": null, "affiliations": []}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5082350181", "display_name": "Annie Xie", "orcid": "https://orcid.org/0000-0003-1736-3775"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Xie, Annie", "raw_affiliation_strings": [], "raw_orcid": null, "affiliations": []}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5027666200", "display_name": "Alex X. Lee", "orcid": null}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Lee, Alex", "raw_affiliation_strings": [], "raw_orcid": null, "affiliations": []}, {"author_position": "last", "author": {"id": "https://openalex.org/A5026322200", "display_name": "Sergey Levine", "orcid": "https://orcid.org/0000-0001-6764-2743"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Levine, Sergey", "raw_affiliation_strings": [], "raw_orcid": null, "affiliations": []}], "institutions": [], "countries_distinct_count": 0, "institutions_distinct_count": 0, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": null, "has_fulltext": false, "cited_by_count": 265, "citation_normalized_percentile": null, "cited_by_percentile_year": null, "biblio": {"volume": null, "issue": null, "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9994999766349792, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9994999766349792, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9991000294685364, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10502", "display_name": "Advanced Memory and Neural Computing", "score": 0.9891999959945679, "subfield": {"id": "https://openalex.org/subfields/2208", "display_name": "Electrical and Electronic Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.8180503845214844}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7615313529968262}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7216188907623291}, {"id": "https://openalex.org/keywords/generalization", "display_name": "Generalization", "score": 0.62183678150177}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.5961755514144897}, {"id": "https://openalex.org/keywords/pixel", "display_name": "Pixel", "score": 0.5803840756416321}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.5160279273986816}, {"id": "https://openalex.org/keywords/object", "display_name": "Object (grammar)", "score": 0.5149974822998047}, {"id": "https://openalex.org/keywords/computer-vision", "display_name": "Computer vision", "score": 0.507150411605835}, {"id": "https://openalex.org/keywords/deep-learning", "display_name": "Deep learning", "score": 0.47004857659339905}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.3961804211139679}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.10632431507110596}], "concepts": [{"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.8180503845214844}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7615313529968262}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7216188907623291}, {"id": "https://openalex.org/C177148314", "wikidata": "https://www.wikidata.org/wiki/Q170084", "display_name": "Generalization", "level": 2, "score": 0.62183678150177}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.5961755514144897}, {"id": "https://openalex.org/C160633673", "wikidata": "https://www.wikidata.org/wiki/Q355198", "display_name": "Pixel", "level": 2, "score": 0.5803840756416321}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.5160279273986816}, {"id": "https://openalex.org/C2781238097", "wikidata": "https://www.wikidata.org/wiki/Q175026", "display_name": "Object (grammar)", "level": 2, "score": 0.5149974822998047}, {"id": "https://openalex.org/C31972630", "wikidata": "https://www.wikidata.org/wiki/Q844240", "display_name": "Computer vision", "level": 1, "score": 0.507150411605835}, {"id": "https://openalex.org/C108583219", "wikidata": "https://www.wikidata.org/wiki/Q197536", "display_name": "Deep learning", "level": 2, "score": 0.47004857659339905}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.3961804211139679}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.10632431507110596}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "pmh:oai:arXiv.org:1812.00568", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1812.00568", "pdf_url": "https://arxiv.org/pdf/1812.00568", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": null}, {"id": "doi:10.48550/arxiv.1812.00568", "is_oa": true, "landing_page_url": "https://doi.org/10.48550/arxiv.1812.00568", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": "Preprint"}], "best_oa_location": {"id": "pmh:oai:arXiv.org:1812.00568", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1812.00568", "pdf_url": "https://arxiv.org/pdf/1812.00568", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": null}, "sustainable_development_goals": [{"display_name": "Quality Education", "id": "https://metadata.un.org/sdg/4", "score": 0.7200000286102295}], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 42, "referenced_works": ["https://openalex.org/W1522301498", "https://openalex.org/W1757796397", "https://openalex.org/W1977687214", "https://openalex.org/W1984383922", "https://openalex.org/W2012587148", "https://openalex.org/W2131600418", "https://openalex.org/W2140135625", "https://openalex.org/W2158782408", "https://openalex.org/W2167089254", "https://openalex.org/W2201912979", "https://openalex.org/W2281096776", "https://openalex.org/W2338684808", "https://openalex.org/W2400532028", "https://openalex.org/W2410156224", "https://openalex.org/W2473208550", "https://openalex.org/W2502312327", "https://openalex.org/W2528489519", "https://openalex.org/W2586067474", "https://openalex.org/W2592285981", "https://openalex.org/W2594961016", "https://openalex.org/W2606047872", "https://openalex.org/W2742605348", "https://openalex.org/W2765349170", "https://openalex.org/W2765994785", "https://openalex.org/W2770257943", "https://openalex.org/W2772709170", "https://openalex.org/W2796303840", "https://openalex.org/W2892490014", "https://openalex.org/W2949099979", "https://openalex.org/W2949962649", "https://openalex.org/W2951553872", "https://openalex.org/W2951748364", "https://openalex.org/W2951775809", "https://openalex.org/W2952578114", "https://openalex.org/W2953118818", "https://openalex.org/W2962736495", "https://openalex.org/W2962793652", "https://openalex.org/W2963125871", "https://openalex.org/W2963430173", "https://openalex.org/W2963435596", "https://openalex.org/W2963629403", "https://openalex.org/W2963634205"], "related_works": ["https://openalex.org/W4306904969", "https://openalex.org/W3162204513", "https://openalex.org/W2138720691", "https://openalex.org/W4362501864", "https://openalex.org/W4380318855", "https://openalex.org/W2031695474", "https://openalex.org/W2024136090", "https://openalex.org/W2586732548", "https://openalex.org/W3049728571", "https://openalex.org/W2964765435"], "abstract_inverted_index": {"Deep": [0], "reinforcement": [1], "learning": [2, 29], "(RL)": [3], "algorithms": [4], "can": [5, 175], "learn": [6], "complex": [7], "robotic": [8, 49], "skills": [9], "from": [10, 87], "raw": [11, 88], "sensory": [12, 89], "inputs,": [13], "but": [14], "have": [15], "yet": [16], "to": [17, 54, 82, 177], "achieve": [18], "the": [19, 85, 129, 192], "kind": [20], "of": [21, 144, 165, 186], "broad": [22], "generalization": [23], "and": [24, 51, 57, 69, 122, 138, 157, 181], "applicability": [25], "demonstrated": [26], "by": [27, 115, 159], "deep": [28, 37, 148], "methods": [30], "in": [31, 119], "supervised": [32], "domains.": [33], "We": [34, 170], "present": [35], "a": [36, 73, 78, 108, 160, 184], "RL": [38], "method": [39], "that": [40, 172], "is": [41, 133], "practical": [42], "for": [43], "real-world": [44], "robotics": [45], "tasks,": [46], "such": [47, 91], "as": [48, 92], "manipulation,": [50], "generalizes": [52], "effectively": [53], "never-before-seen": [55, 178], "tasks": [56, 114, 190], "objects.": [58], "In": [59], "these": [60], "settings,": [61], "ground": [62], "truth": [63], "reward": [64], "signals": [65], "are": [66, 151], "typically": [67], "unavailable,": [68], "we": [70, 98], "therefore": [71], "propose": [72], "self-supervised": [74], "model-based": [75], "approach,": [76], "where": [77, 107, 128], "predictive": [79, 149], "model": [80], "learns": [81], "directly": [83], "predict": [84], "future": [86], "readings,": [90], "camera": [93], "images.": [94], "At": [95], "test": [96], "time,": [97], "explore": [99], "three": [100], "distinct": [101], "goal": [102, 124, 126, 131, 145], "specification": [103], "methods:": [104], "designated": [105], "pixels,": [106], "user": [109], "specifies": [110], "desired": [111, 130], "object": [112, 188], "manipulation": [113, 189], "selecting": [116], "particular": [117], "pixels": [118], "an": [120, 136], "image": [121, 139], "corresponding": [123], "positions,": [125], "images,": [127], "state": [132], "specified": [134], "with": [135, 163], "image,": [137], "classifiers,": [140], "which": [141], "define": [142], "spaces": [143], "states.": [146], "Our": [147], "models": [150], "trained": [152], "using": [153, 191], "data": [154], "collected": [155], "autonomously": [156], "continuously": [158], "robot": [161], "interacting": [162], "hundreds": [164], "objects,": [166], "without": [167], "human": [168], "supervision.": [169], "demonstrate": [171], "visual": [173], "MPC": [174], "generalize": [176], "objects---both": [179], "rigid": [180], "deformable---and": [182], "solve": [183], "range": [185], "user-defined": [187], "same": [193], "model.": [194]}, "counts_by_year": [{"year": 2026, "cited_by_count": 2}, {"year": 2025, "cited_by_count": 5}, {"year": 2024, "cited_by_count": 22}, {"year": 2023, "cited_by_count": 39}, {"year": 2022, "cited_by_count": 26}, {"year": 2021, "cited_by_count": 74}, {"year": 2020, "cited_by_count": 67}, {"year": 2019, "cited_by_count": 29}, {"year": 2018, "cited_by_count": 1}], "updated_date": "2026-07-01T06:00:48.157686", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3093426589", "doi": "https://doi.org/10.1631/fitee.1900533", "title": "Deep reinforcement learning: a survey", "display_name": "Deep reinforcement learning: a survey", "relevance_score": 368.9691, "publication_year": 2020, "publication_date": "2020-10-15", "ids": {"openalex": "https://openalex.org/W3093426589", "doi": "https://doi.org/10.1631/fitee.1900533", "mag": "3093426589"}, "language": "en", "primary_location": {"id": "doi:10.1631/fitee.1900533", "is_oa": false, "landing_page_url": "https://doi.org/10.1631/fitee.1900533", "pdf_url": null, "source": {"id": "https://openalex.org/S4210189857", "display_name": "Frontiers of Information Technology & Electronic Engineering", "issn_l": "2095-9184", "issn": ["2095-9184", "2095-9230"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Frontiers of Information Technology &amp; Electronic Engineering", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": null, "display_name": "Hao-nan Wang", "orcid": "https://orcid.org/0000-0002-0792-3858"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Hao-nan Wang", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": "https://orcid.org/0000-0002-0792-3858", "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5101683584", "display_name": "Ning Liu", "orcid": "https://orcid.org/0000-0001-7475-9739"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Ning Liu", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5040322824", "display_name": "Yiyun Zhang", "orcid": "https://orcid.org/0000-0002-1419-485X"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Yi-yun Zhang", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5039795290", "display_name": "Dawei Feng", "orcid": "https://orcid.org/0000-0002-7587-8905"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Da-wei Feng", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5051363049", "display_name": "Feng Huang", "orcid": "https://orcid.org/0000-0002-0740-9373"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Feng Huang", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5100681030", "display_name": "Dongsheng Li", "orcid": "https://orcid.org/0000-0001-7725-8040"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Dong-sheng Li", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5100395388", "display_name": "Yiming Zhang", "orcid": "https://orcid.org/0000-0003-3857-8433"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Yi-ming Zhang", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": [], "corresponding_institution_ids": ["https://openalex.org/I170215575"], "apc_list": null, "apc_paid": null, "fwci": 13.2267, "has_fulltext": false, "cited_by_count": 278, "citation_normalized_percentile": {"value": 0.99026506, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 90, "max": 100}, "biblio": {"volume": "21", "issue": "12", "first_page": "1726", "last_page": "1744"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11975", "display_name": "Evolutionary Algorithms and Applications", "score": 0.9897000193595886, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12784", "display_name": "Modular Robots and Swarm Intelligence", "score": 0.9832000136375427, "subfield": {"id": "https://openalex.org/subfields/2210", "display_name": "Mechanical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8006557822227478}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7813801765441895}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6882914900779724}, {"id": "https://openalex.org/keywords/categorization", "display_name": "Categorization", "score": 0.5173744559288025}, {"id": "https://openalex.org/keywords/deep-learning", "display_name": "Deep learning", "score": 0.4922133982181549}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.39875051379203796}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8006557822227478}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7813801765441895}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6882914900779724}, {"id": "https://openalex.org/C94124525", "wikidata": "https://www.wikidata.org/wiki/Q912550", "display_name": "Categorization", "level": 2, "score": 0.5173744559288025}, {"id": "https://openalex.org/C108583219", "wikidata": "https://www.wikidata.org/wiki/Q197536", "display_name": "Deep learning", "level": 2, "score": 0.4922133982181549}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.39875051379203796}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1631/fitee.1900533", "is_oa": false, "landing_page_url": "https://doi.org/10.1631/fitee.1900533", "pdf_url": null, "source": {"id": "https://openalex.org/S4210189857", "display_name": "Frontiers of Information Technology & Electronic Engineering", "issn_l": "2095-9184", "issn": ["2095-9184", "2095-9230"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Frontiers of Information Technology &amp; Electronic Engineering", "raw_type": "journal-article"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 154, "referenced_works": ["https://openalex.org/W41554520", "https://openalex.org/W1575592356", "https://openalex.org/W1757796397", "https://openalex.org/W1968962398", "https://openalex.org/W1988526405", "https://openalex.org/W1999874108", "https://openalex.org/W2021247827", "https://openalex.org/W2061562262", "https://openalex.org/W2098774185", "https://openalex.org/W2100401322", "https://openalex.org/W2100677568", "https://openalex.org/W2104733512", "https://openalex.org/W2106164082", "https://openalex.org/W2108738385", "https://openalex.org/W2119717200", "https://openalex.org/W2121863487", "https://openalex.org/W2124352385", "https://openalex.org/W2126909264", "https://openalex.org/W2141559645", "https://openalex.org/W2145339207", "https://openalex.org/W2150468603", "https://openalex.org/W2155968351", "https://openalex.org/W2163605009", "https://openalex.org/W2165150801", "https://openalex.org/W2169498096", "https://openalex.org/W2173564293", "https://openalex.org/W2257979135", "https://openalex.org/W2280163991", "https://openalex.org/W2290104316", "https://openalex.org/W2417786368", "https://openalex.org/W2419612459", "https://openalex.org/W2434014514", "https://openalex.org/W2466175722", "https://openalex.org/W2523013761", "https://openalex.org/W2528489519", "https://openalex.org/W2529477964", "https://openalex.org/W2529658650", "https://openalex.org/W2546571074", "https://openalex.org/W2550182557", "https://openalex.org/W2556958149", "https://openalex.org/W2561776174", "https://openalex.org/W2575705757", "https://openalex.org/W2593044849", "https://openalex.org/W2596982695", "https://openalex.org/W2604373826", "https://openalex.org/W2604763608", "https://openalex.org/W2614839826", "https://openalex.org/W2616964725", "https://openalex.org/W2726187156", "https://openalex.org/W2727576081", "https://openalex.org/W2736601468", "https://openalex.org/W2738669288", "https://openalex.org/W2739473244", "https://openalex.org/W2746553466", "https://openalex.org/W2749604329", "https://openalex.org/W2749807327", "https://openalex.org/W2755546070", "https://openalex.org/W2761873684", "https://openalex.org/W2765349170", "https://openalex.org/W2766610320", "https://openalex.org/W2766812927", "https://openalex.org/W2767050701", "https://openalex.org/W2781726626", "https://openalex.org/W2785397462", "https://openalex.org/W2786928559", "https://openalex.org/W2787501667", "https://openalex.org/W2787938642", "https://openalex.org/W2788357188", "https://openalex.org/W2788904251", "https://openalex.org/W2789824229", "https://openalex.org/W2795109282", "https://openalex.org/W2796290181", "https://openalex.org/W2805560727", "https://openalex.org/W2805762288", "https://openalex.org/W2810754397", "https://openalex.org/W2810785043", "https://openalex.org/W2824027552", "https://openalex.org/W2885550588", "https://openalex.org/W2888541716", "https://openalex.org/W2889347284", "https://openalex.org/W2892620417", "https://openalex.org/W2895531857", "https://openalex.org/W2903630557", "https://openalex.org/W2912757816", "https://openalex.org/W2914752403", "https://openalex.org/W2923504512", "https://openalex.org/W2938321354", "https://openalex.org/W2949561945", "https://openalex.org/W2949608212", "https://openalex.org/W2950359962", "https://openalex.org/W2950471160", "https://openalex.org/W2950492145", "https://openalex.org/W2950794298", "https://openalex.org/W2951266961", "https://openalex.org/W2951775809", "https://openalex.org/W2951948137", "https://openalex.org/W2952787800", "https://openalex.org/W2954058884", "https://openalex.org/W2962715211", "https://openalex.org/W2962858248", "https://openalex.org/W2962872206", "https://openalex.org/W2962938178", "https://openalex.org/W2963068985", "https://openalex.org/W2963184621", "https://openalex.org/W2963190967", "https://openalex.org/W2963280855", "https://openalex.org/W2963313316", "https://openalex.org/W2963423916", "https://openalex.org/W2963430173", "https://openalex.org/W2963477884", "https://openalex.org/W2963523627", "https://openalex.org/W2963614114", "https://openalex.org/W2963634205", "https://openalex.org/W2963703448", "https://openalex.org/W2963864421", "https://openalex.org/W2963993537", "https://openalex.org/W2964043796", "https://openalex.org/W2964161785", "https://openalex.org/W2964174623", "https://openalex.org/W2964262254", "https://openalex.org/W2964309167", "https://openalex.org/W2968986602", "https://openalex.org/W2971218263", "https://openalex.org/W3037590790", "https://openalex.org/W3101442004", "https://openalex.org/W3103559770", "https://openalex.org/W3103780890", "https://openalex.org/W3104515094", "https://openalex.org/W4241811150", "https://openalex.org/W6600002382", "https://openalex.org/W6600137863", "https://openalex.org/W6600168703", "https://openalex.org/W6600446476", "https://openalex.org/W6600553734", "https://openalex.org/W6601211009", "https://openalex.org/W6601870611", "https://openalex.org/W6602613565", "https://openalex.org/W6602704705", "https://openalex.org/W6603727575", "https://openalex.org/W6603732165", "https://openalex.org/W6605730092", "https://openalex.org/W6606825362", "https://openalex.org/W6606882031", "https://openalex.org/W6608490358", "https://openalex.org/W6633651462", "https://openalex.org/W6675999342", "https://openalex.org/W6676077707", "https://openalex.org/W6718092244", "https://openalex.org/W6743806954", "https://openalex.org/W6746203390", "https://openalex.org/W6772334619", "https://openalex.org/W6811851346", "https://openalex.org/W6814003322", "https://openalex.org/W6846889679"], "related_works": ["https://openalex.org/W2731899572", "https://openalex.org/W2961085424", "https://openalex.org/W3215138031", "https://openalex.org/W4306674287", "https://openalex.org/W3009238340", "https://openalex.org/W2939353110", "https://openalex.org/W4321369474", "https://openalex.org/W4360585206", "https://openalex.org/W4285208911", "https://openalex.org/W3046775127"], "abstract_inverted_index": {"Deep": [0], "reinforcement": [1], "learning": [2], "(RL)": [3], "has": [4, 17], "become": [5], "one": [6], "of": [7], "the": [8, 43, 74, 86], "most": [9], "popular": [10], "topics": [11], "in": [12, 21], "artificial": [13], "intelligence": [14], "research.": [15, 97], "It": [16], "been": [18], "widely": [19], "used": [20], "various": [22], "fields,": [23], "such": [24], "as": [25], "end-to-end": [26], "control,": [27, 29], "robotic": [28], "recommendation": [30], "systems,": [31], "and": [32, 47, 49, 67, 80, 90], "natural": [33], "language": [34], "dialogue": [35], "systems.": [36], "In": [37], "this": [38], "survey,": [39], "we": [40, 84], "systematically": [41], "categorize": [42], "deep": [44, 56], "RL": [45, 57, 69], "algorithms": [46, 58], "applications,": [48, 89], "provide": [50], "a": [51], "detailed": [52], "review": [53], "over": [54], "existing": [55], "by": [59], "dividing": [60], "them": [61], "into": [62], "modelbased": [63], "methods,": [64, 66], "model-free": [65], "advanced": [68], "methods.": [70], "We": [71], "thoroughly": [72], "analyze": [73, 91], "advances": [75], "including": [76], "exploration,": [77], "inverse": [78], "RL,": [79], "transfer": [81], "RL.": [82], "Finally,": [83], "outline": [85], "current": [87], "representative": [88], "four": [92], "open": [93], "problems": [94], "for": [95], "future": [96]}, "counts_by_year": [{"year": 2026, "cited_by_count": 12}, {"year": 2025, "cited_by_count": 61}, {"year": 2024, "cited_by_count": 106}, {"year": 2023, "cited_by_count": 47}, {"year": 2022, "cited_by_count": 35}, {"year": 2021, "cited_by_count": 16}, {"year": 2019, "cited_by_count": 1}], "updated_date": "2026-06-30T13:55:48.251075", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2522489477", "doi": "https://doi.org/10.1609/aaai.v31i1.10827", "title": "Playing FPS Games with Deep Reinforcement Learning", "display_name": "Playing FPS Games with Deep Reinforcement Learning", "relevance_score": 329.57468, "publication_year": 2017, "publication_date": "2017-02-13", "ids": {"openalex": "https://openalex.org/W2522489477", "doi": "https://doi.org/10.1609/aaai.v31i1.10827", "mag": "2522489477"}, "language": "en", "primary_location": {"id": "doi:10.1609/aaai.v31i1.10827", "is_oa": true, "landing_page_url": "https://doi.org/10.1609/aaai.v31i1.10827", "pdf_url": null, "source": {"id": "https://openalex.org/S4210191458", "display_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "issn_l": "2159-5399", "issn": ["2159-5399", "2374-3468"], "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320058", "host_organization_name": "Association for the Advancement of Artificial Intelligence", "host_organization_lineage": ["https://openalex.org/P4310320058"], "host_organization_lineage_names": ["Association for the Advancement of Artificial Intelligence"], "type": "conference"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "diamond", "oa_url": "https://doi.org/10.1609/aaai.v31i1.10827", "any_repository_has_fulltext": null}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5054371148", "display_name": "Guillaume Lample", "orcid": null}, "institutions": [{"id": "https://openalex.org/I74973139", "display_name": "Carnegie Mellon University", "ror": "https://ror.org/05x2bcf33", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I74973139"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Guillaume Lample", "raw_affiliation_strings": ["Carnegie Mellon University"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Carnegie Mellon University", "institution_ids": ["https://openalex.org/I74973139"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5016223957", "display_name": "Devendra Singh Chaplot", "orcid": null}, "institutions": [{"id": "https://openalex.org/I74973139", "display_name": "Carnegie Mellon University", "ror": "https://ror.org/05x2bcf33", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I74973139"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Devendra Singh Chaplot", "raw_affiliation_strings": ["Carnegie Mellon University"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Carnegie Mellon University", "institution_ids": ["https://openalex.org/I74973139"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": [], "corresponding_institution_ids": ["https://openalex.org/I74973139"], "apc_list": null, "apc_paid": null, "fwci": 22.956, "has_fulltext": true, "cited_by_count": 450, "citation_normalized_percentile": {"value": 0.99516908, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 90, "max": 100}, "biblio": {"volume": "31", "issue": "1", "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998999834060669, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998999834060669, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11574", "display_name": "Artificial Intelligence in Games", "score": 0.9990000128746033, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10812", "display_name": "Human Pose and Action Recognition", "score": 0.9853000044822693, "subfield": {"id": "https://openalex.org/subfields/1707", "display_name": "Computer Vision and Pattern Recognition"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8969074487686157}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.8240053057670593}, {"id": "https://openalex.org/keywords/exploit", "display_name": "Exploit", "score": 0.7746844291687012}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6925550699234009}, {"id": "https://openalex.org/keywords/architecture", "display_name": "Architecture", "score": 0.6887726783752441}, {"id": "https://openalex.org/keywords/feature", "display_name": "Feature (linguistics)", "score": 0.5553386807441711}, {"id": "https://openalex.org/keywords/deep-learning", "display_name": "Deep learning", "score": 0.47802743315696716}, {"id": "https://openalex.org/keywords/video-game", "display_name": "Video game", "score": 0.4256015121936798}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.4125569760799408}, {"id": "https://openalex.org/keywords/human\u2013computer-interaction", "display_name": "Human\u2013computer interaction", "score": 0.33617228269577026}, {"id": "https://openalex.org/keywords/multimedia", "display_name": "Multimedia", "score": 0.151155024766922}, {"id": "https://openalex.org/keywords/computer-security", "display_name": "Computer security", "score": 0.07986414432525635}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8969074487686157}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.8240053057670593}, {"id": "https://openalex.org/C165696696", "wikidata": "https://www.wikidata.org/wiki/Q11287", "display_name": "Exploit", "level": 2, "score": 0.7746844291687012}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6925550699234009}, {"id": "https://openalex.org/C123657996", "wikidata": "https://www.wikidata.org/wiki/Q12271", "display_name": "Architecture", "level": 2, "score": 0.6887726783752441}, {"id": "https://openalex.org/C2776401178", "wikidata": "https://www.wikidata.org/wiki/Q12050496", "display_name": "Feature (linguistics)", "level": 2, "score": 0.5553386807441711}, {"id": "https://openalex.org/C108583219", "wikidata": "https://www.wikidata.org/wiki/Q197536", "display_name": "Deep learning", "level": 2, "score": 0.47802743315696716}, {"id": "https://openalex.org/C3018412434", "wikidata": "https://www.wikidata.org/wiki/Q7889", "display_name": "Video game", "level": 2, "score": 0.4256015121936798}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.4125569760799408}, {"id": "https://openalex.org/C107457646", "wikidata": "https://www.wikidata.org/wiki/Q207434", "display_name": "Human\u2013computer interaction", "level": 1, "score": 0.33617228269577026}, {"id": "https://openalex.org/C49774154", "wikidata": "https://www.wikidata.org/wiki/Q131765", "display_name": "Multimedia", "level": 1, "score": 0.151155024766922}, {"id": "https://openalex.org/C38652104", "wikidata": "https://www.wikidata.org/wiki/Q3510521", "display_name": "Computer security", "level": 1, "score": 0.07986414432525635}, {"id": "https://openalex.org/C142362112", "wikidata": "https://www.wikidata.org/wiki/Q735", "display_name": "Art", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C153349607", "wikidata": "https://www.wikidata.org/wiki/Q36649", "display_name": "Visual arts", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C41895202", "wikidata": "https://www.wikidata.org/wiki/Q8162", "display_name": "Linguistics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C138885662", "wikidata": "https://www.wikidata.org/wiki/Q5891", "display_name": "Philosophy", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "doi:10.1609/aaai.v31i1.10827", "is_oa": true, "landing_page_url": "https://doi.org/10.1609/aaai.v31i1.10827", "pdf_url": null, "source": {"id": "https://openalex.org/S4210191458", "display_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "issn_l": "2159-5399", "issn": ["2159-5399", "2374-3468"], "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320058", "host_organization_name": "Association for the Advancement of Artificial Intelligence", "host_organization_lineage": ["https://openalex.org/P4310320058"], "host_organization_lineage_names": ["Association for the Advancement of Artificial Intelligence"], "type": "conference"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "raw_type": "journal-article"}, {"id": "pmh:oai:ojs.aaai.org:article/10827", "is_oa": true, "landing_page_url": "https://ojs.aaai.org/index.php/AAAI/article/view/10827", "pdf_url": "https://ojs.aaai.org/index.php/AAAI/article/download/10827/10686", "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "2159-5399", "raw_type": "info:eu-repo/semantics/article"}], "best_oa_location": {"id": "doi:10.1609/aaai.v31i1.10827", "is_oa": true, "landing_page_url": "https://doi.org/10.1609/aaai.v31i1.10827", "pdf_url": null, "source": {"id": "https://openalex.org/S4210191458", "display_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "issn_l": "2159-5399", "issn": ["2159-5399", "2374-3468"], "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320058", "host_organization_name": "Association for the Advancement of Artificial Intelligence", "host_organization_lineage": ["https://openalex.org/P4310320058"], "host_organization_lineage_names": ["Association for the Advancement of Artificial Intelligence"], "type": "conference"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "raw_type": "journal-article"}, "sustainable_development_goals": [], "awards": [], "funders": [{"id": "https://openalex.org/F4320310207", "display_name": "Carnegie Mellon University", "ror": "https://ror.org/05x2bcf33"}], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 31, "referenced_works": ["https://openalex.org/W142858861", "https://openalex.org/W1484210532", "https://openalex.org/W1566770693", "https://openalex.org/W1595483645", "https://openalex.org/W1757796397", "https://openalex.org/W1906772730", "https://openalex.org/W2038794597", "https://openalex.org/W2121092017", "https://openalex.org/W2155968351", "https://openalex.org/W2160279936", "https://openalex.org/W2173564293", "https://openalex.org/W2201581102", "https://openalex.org/W2255045308", "https://openalex.org/W2257979135", "https://openalex.org/W2362143032", "https://openalex.org/W2399570347", "https://openalex.org/W2604268026", "https://openalex.org/W2746553466", "https://openalex.org/W2951799221", "https://openalex.org/W2952509347", "https://openalex.org/W2952523895", "https://openalex.org/W2963871073", "https://openalex.org/W2964006217", "https://openalex.org/W2964161785", "https://openalex.org/W3103780890", "https://openalex.org/W4297669414", "https://openalex.org/W4298857966", "https://openalex.org/W6605846256", "https://openalex.org/W6666761814", "https://openalex.org/W6683508713", "https://openalex.org/W6736309856"], "related_works": ["https://openalex.org/W1496222301", "https://openalex.org/W3207760230", "https://openalex.org/W1590307681", "https://openalex.org/W4312814274", "https://openalex.org/W4285370786", "https://openalex.org/W2296488620", "https://openalex.org/W2358353312", "https://openalex.org/W2353836703", "https://openalex.org/W41015297", "https://openalex.org/W4280645561"], "abstract_inverted_index": {"Advances": [0], "in": [1, 33, 55, 167], "deep": [2, 65], "reinforcement": [3, 66], "learning": [4, 67], "have": [5], "allowed": [6], "autonomous": [7], "agents": [8, 158], "to": [9, 22, 40, 51, 79, 83, 104, 118, 134, 138], "perform": [10], "well": [11, 163], "on": [12], "Atari": [13], "games,": [14, 58], "often": [15], "outperforming": [16], "humans,": [17], "using": [18], "only": [19, 69], "raw": [20], "pixels": [21], "make": [23], "their": [24], "decisions.": [25], "However,": [26], "most": [27], "of": [28, 92, 126, 145, 159], "these": [29, 81, 107], "games": [30], "take": [31], "place": [32], "2D": [34], "environments": [35, 54], "that": [36, 59, 150], "are": [37], "fully": [38], "observable": [39, 62], "the": [41, 48, 90, 97, 121, 146, 151, 160], "agent.": [42, 128], "In": [43], "this": [44], "paper,": [45], "we": [46], "present": [47, 76], "first": [49], "architecture": [50, 130, 153], "tackle": [52], "3D": [53], "first-person": [56], "shooter": [57], "involve": [60], "partially": [61], "states.": [63], "Typically,": [64], "methods": [68], "utilize": [70], "visual": [71], "input": [72], "for": [73, 142], "training.": [74], "We": [75, 148], "a": [77, 112], "method": [78], "augment": [80], "models": [82, 137], "exploit": [84], "game": [85, 161], "feature": [86], "information": [87], "such": [88], "as": [89, 162, 164], "presence": [91], "enemies": [93], "or": [94], "items,": [95], "during": [96], "training": [98, 122], "phase.": [99], "Our": [100, 129], "model": [101], "is": [102, 116, 131], "trained": [103, 141], "simultaneously": [105], "learn": [106], "features": [108], "along": [109], "with": [110], "minimizing": [111], "Q-learning": [113], "objective,": [114], "which": [115], "shown": [117], "dramatically": [119], "improve": [120], "speed": [123], "and": [124], "performance": [125], "our": [127], "also": [132], "modularized": [133], "allow": [135], "different": [136, 143], "be": [139], "independently": [140], "phases": [144], "game.": [147], "show": [149], "proposed": [152], "substantially": [154], "outperforms": [155], "built-in": [156], "AI": [157], "average": [165], "humans": [166], "deathmatch": [168], "scenarios.": [169]}, "counts_by_year": [{"year": 2026, "cited_by_count": 8}, {"year": 2025, "cited_by_count": 26}, {"year": 2024, "cited_by_count": 49}, {"year": 2023, "cited_by_count": 65}, {"year": 2022, "cited_by_count": 49}, {"year": 2021, "cited_by_count": 54}, {"year": 2020, "cited_by_count": 58}, {"year": 2019, "cited_by_count": 56}, {"year": 2018, "cited_by_count": 58}, {"year": 2017, "cited_by_count": 26}, {"year": 2016, "cited_by_count": 1}], "updated_date": "2026-07-02T09:51:11.867554", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2803281228", "doi": "https://doi.org/10.48550/arxiv.1805.08296", "title": "Data-Efficient Hierarchical Reinforcement Learning", "display_name": "Data-Efficient Hierarchical Reinforcement Learning", "relevance_score": 318.18167, "publication_year": 2018, "publication_date": "2018-05-21", "ids": {"openalex": "https://openalex.org/W2803281228", "doi": "https://doi.org/10.48550/arxiv.1805.08296", "mag": "2803281228"}, "language": "en", "primary_location": {"id": "pmh:oai:arXiv.org:1805.08296", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1805.08296", "pdf_url": "https://arxiv.org/pdf/1805.08296", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, "type": "article", "indexed_in": ["arxiv", "datacite"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": "https://arxiv.org/pdf/1805.08296", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5057773393", "display_name": "Ofir Nachum", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Nachum, Ofir", "raw_affiliation_strings": ["Google LLC"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google LLC", "institution_ids": ["https://openalex.org/I1291425158"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5061613634", "display_name": "Shixiang Gu", "orcid": null}, "institutions": [{"id": "https://openalex.org/I149899117", "display_name": "Max Planck Society", "ror": "https://ror.org/01hhn8329", "country_code": "DE", "type": "nonprofit", "lineage": ["https://openalex.org/I149899117"]}, {"id": "https://openalex.org/I4210135521", "display_name": "Max Planck Institute for Intelligent Systems", "ror": "https://ror.org/04fq9j139", "country_code": "DE", "type": "facility", "lineage": ["https://openalex.org/I149899117", "https://openalex.org/I4210135521"]}], "countries": ["DE"], "is_corresponding": false, "raw_author_name": "Gu, Shixiang", "raw_affiliation_strings": ["Dept. Empirical Inference, Max Planck Institute for Intelligent Systems, Max Planck Society"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Dept. Empirical Inference, Max Planck Institute for Intelligent Systems, Max Planck Society", "institution_ids": ["https://openalex.org/I4210135521", "https://openalex.org/I149899117"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5108652283", "display_name": "Honglak Lee", "orcid": "https://orcid.org/0000-0002-4109-327X"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Lee, Honglak", "raw_affiliation_strings": ["Google LLC"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google LLC", "institution_ids": ["https://openalex.org/I1291425158"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5026322200", "display_name": "Sergey Levine", "orcid": "https://orcid.org/0000-0001-6764-2743"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Levine, Sergey", "raw_affiliation_strings": [], "raw_orcid": null, "affiliations": []}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 3, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": null, "has_fulltext": false, "cited_by_count": 265, "citation_normalized_percentile": null, "cited_by_percentile_year": null, "biblio": {"volume": "31", "issue": null, "first_page": "3303", "last_page": "3313"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11689", "display_name": "Adversarial Robustness in Machine Learning", "score": 0.9453999996185303, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11099", "display_name": "Autonomous Vehicle Technology and Safety", "score": 0.9336000084877014, "subfield": {"id": "https://openalex.org/subfields/2203", "display_name": "Automotive Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/generality", "display_name": "Generality", "score": 0.9141421914100647}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.907731831073761}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7623730897903442}, {"id": "https://openalex.org/keywords/task", "display_name": "Task (project management)", "score": 0.64748615026474}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6188674569129944}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.5258208513259888}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.49584469199180603}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.4847871661186218}, {"id": "https://openalex.org/keywords/scheme", "display_name": "Scheme (mathematics)", "score": 0.46986114978790283}, {"id": "https://openalex.org/keywords/action", "display_name": "Action (physics)", "score": 0.44638606905937195}, {"id": "https://openalex.org/keywords/policy-learning", "display_name": "Policy learning", "score": 0.41278672218322754}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.09693843126296997}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.0961247980594635}], "concepts": [{"id": "https://openalex.org/C2780767217", "wikidata": "https://www.wikidata.org/wiki/Q5532421", "display_name": "Generality", "level": 2, "score": 0.9141421914100647}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.907731831073761}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7623730897903442}, {"id": "https://openalex.org/C2780451532", "wikidata": "https://www.wikidata.org/wiki/Q759676", "display_name": "Task (project management)", "level": 2, "score": 0.64748615026474}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6188674569129944}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.5258208513259888}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.49584469199180603}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.4847871661186218}, {"id": "https://openalex.org/C77618280", "wikidata": "https://www.wikidata.org/wiki/Q1155772", "display_name": "Scheme (mathematics)", "level": 2, "score": 0.46986114978790283}, {"id": "https://openalex.org/C2780791683", "wikidata": "https://www.wikidata.org/wiki/Q846785", "display_name": "Action (physics)", "level": 2, "score": 0.44638606905937195}, {"id": "https://openalex.org/C2779436431", "wikidata": "https://www.wikidata.org/wiki/Q30672407", "display_name": "Policy learning", "level": 2, "score": 0.41278672218322754}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.09693843126296997}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.0961247980594635}, {"id": "https://openalex.org/C62520636", "wikidata": "https://www.wikidata.org/wiki/Q944", "display_name": "Quantum mechanics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C542102704", "wikidata": "https://www.wikidata.org/wiki/Q183257", "display_name": "Psychotherapist", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C201995342", "wikidata": "https://www.wikidata.org/wiki/Q682496", "display_name": "Systems engineering", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C15744967", "wikidata": "https://www.wikidata.org/wiki/Q9418", "display_name": "Psychology", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 5, "locations": [{"id": "pmh:oai:arXiv.org:1805.08296", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1805.08296", "pdf_url": "https://arxiv.org/pdf/1805.08296", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, {"id": "mag:2950614095", "is_oa": true, "landing_page_url": "http://export.arxiv.org/pdf/1805.08296", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "arXiv (Cornell University)", "raw_type": null}, {"id": "pmh:oai:pure.mpg.de:item_3049522", "is_oa": false, "landing_page_url": "http://hdl.handle.net/21.11116/0000-0003-72EA-5", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400654", "display_name": "MPG.PuRe (Max Planck Society)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I149899117", "host_organization_name": "Max Planck Society", "host_organization_lineage": ["https://openalex.org/I149899117"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Advances in Neural Information Processing Systems 31", "raw_type": "info:eu-repo/semantics/conferenceObject"}, {"id": "doi:10.48550/arxiv.1805.08296", "is_oa": true, "landing_page_url": "https://doi.org/10.48550/arxiv.1805.08296", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": "Preprint"}, {"id": "mag:2803281228", "is_oa": false, "landing_page_url": "http://papers.nips.cc/paper/7591-data-efficient-hierarchical-reinforcement-learning.pdf", "pdf_url": null, "source": {"id": "https://openalex.org/S4363606243", "display_name": "neural information processing systems", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "journal"}, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": "neural information processing systems", "raw_type": null}], "best_oa_location": {"id": "pmh:oai:arXiv.org:1805.08296", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1805.08296", "pdf_url": "https://arxiv.org/pdf/1805.08296", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 3, "referenced_works": ["https://openalex.org/W1534480106", "https://openalex.org/W2335959470", "https://openalex.org/W2733961795"], "related_works": ["https://openalex.org/W567721252", "https://openalex.org/W2964227312", "https://openalex.org/W2964001908", "https://openalex.org/W2963262099", "https://openalex.org/W2736601468", "https://openalex.org/W2594829461", "https://openalex.org/W2173248099", "https://openalex.org/W2160371091", "https://openalex.org/W2158782408", "https://openalex.org/W2145339207", "https://openalex.org/W2121863487", "https://openalex.org/W2109910161", "https://openalex.org/W2963438456", "https://openalex.org/W1771410628", "https://openalex.org/W2964161785", "https://openalex.org/W2964043796", "https://openalex.org/W2257979135", "https://openalex.org/W2964121744", "https://openalex.org/W1757796397", "https://openalex.org/W2121517924"], "abstract_inverted_index": {"Hierarchical": [0], "reinforcement": [1, 11], "learning": [2, 12, 234], "(HRL)": [3], "is": [4, 201], "a": [5, 99, 135, 237, 243, 252], "promising": [6], "approach": [7, 262], "to": [8, 15, 37, 123, 140, 158, 165, 175, 215, 230, 242], "extend": [9], "traditional": [10], "(RL)": [13], "methods": [14, 26], "solve": [16], "more": [17], "complex": [18, 218], "tasks.": [19], "Yet,": [20], "the": [21, 72, 115, 141, 145, 149, 192], "majority": [22], "of": [23, 82, 168, 246, 254], "current": [24], "HRL": [25, 51, 194, 256], "require": [27], "careful": [28], "task-specific": [29], "design": [30], "and": [31, 69, 111, 130, 152, 179, 197, 204, 227], "on-policy": [32, 188], "training,": [33], "making": [34, 85], "them": [35, 86, 229], "difficult": [36], "apply": [38], "in": [39, 56, 71, 171], "real-world": [40, 89], "scenarios.": [41], "In": [42, 249], "this": [43, 160], "paper,": [44], "we": [45, 48, 97, 121, 153, 258], "study": [46], "how": [47], "can": [49, 76, 212], "develop": [50, 98], "algorithms": [52], "that": [53, 57, 74, 108, 199, 210, 260], "are": [54, 104, 109], "general,": [55], "they": [58, 75], "do": [59], "not": [60], "make": [61], "onerous": [62], "additional": [63], "assumptions": [64], "beyond": [65], "standard": [66], "RL": [67, 174], "algorithms,": [68], "efficient,": [70], "sense": [73], "be": [77, 213], "used": [78, 214], "with": [79, 106, 251], "modest": [80], "numbers": [81], "interaction": [83], "samples,": [84, 240], "suitable": [87], "for": [88, 127, 148, 220], "problems": [90], "such": [91, 223], "as": [92, 224], "robotic": [93], "control.": [94], "For": [95], "generality,": [96], "scheme": [100], "where": [101], "lower-level": [102, 131, 142, 180], "controllers": [103], "supervised": [105], "goals": [107], "learned": [110], "proposed": [112], "automatically": [113], "by": [114], "higher-level": [116, 150], "controllers.": [117], "To": [118], "address": [119], "efficiency,": [120], "propose": [122], "use": [124], "off-policy": [125, 156, 172], "experience": [126], "both": [128, 177], "higher": [129], "training.": [132], "This": [133, 162], "poses": [134], "considerable": [136], "challenge,": [137], "since": [138], "changes": [139], "behaviors": [143, 219], "change": [144], "action": [146], "space": [147], "policy,": [151], "introduce": [154], "an": [155], "correction": [157], "remedy": [159], "challenge.": [161], "allows": [163], "us": [164], "take": [166], "advantage": [167], "recent": [169], "advances": [170], "model-free": [173], "learn": [176, 216], "higher-": [178], "policies": [181], "using": [182], "substantially": [183, 263], "fewer": [184], "environment": [185], "interactions": [186], "than": [187], "algorithms.": [189], "We": [190], "term": [191], "resulting": [193], "agent": [195], "HIRO": [196, 211], "find": [198, 259], "it": [200], "generally": [202], "applicable": [203], "highly": [205, 217], "sample-efficient.": [206], "Our": [207], "experiments": [208], "show": [209], "simulated": [221], "robots,": [222], "pushing": [225], "objects": [226], "utilizing": [228], "reach": [231], "target": [232], "locations,": [233], "from": [235], "only": [236], "few": [238, 244], "million": [239], "equivalent": [241], "days": [245], "real-time": [247], "interaction.": [248], "comparisons": [250], "number": [253], "prior": [255], "methods,": [257], "our": [261], "outperforms": [264], "previous": [265], "state-of-the-art": [266], "techniques.": [267]}, "counts_by_year": [{"year": 2025, "cited_by_count": 6}, {"year": 2024, "cited_by_count": 13}, {"year": 2023, "cited_by_count": 26}, {"year": 2022, "cited_by_count": 20}, {"year": 2021, "cited_by_count": 80}, {"year": 2020, "cited_by_count": 74}, {"year": 2019, "cited_by_count": 43}, {"year": 2018, "cited_by_count": 3}], "updated_date": "2026-07-01T06:00:48.157686", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W4205837126", "doi": "https://doi.org/10.3390/app12020937", "title": "Smart Industrial Robot Control Trends, Challenges and Opportunities within Manufacturing", "display_name": "Smart Industrial Robot Control Trends, Challenges and Opportunities within Manufacturing", "relevance_score": 315.64594, "publication_year": 2022, "publication_date": "2022-01-17", "ids": {"openalex": "https://openalex.org/W4205837126", "doi": "https://doi.org/10.3390/app12020937"}, "language": "en", "primary_location": {"id": "doi:10.3390/app12020937", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/app12020937", "pdf_url": "https://www.mdpi.com/2076-3417/12/2/937/pdf?version=1642436013", "source": {"id": "https://openalex.org/S4210205812", "display_name": "Applied Sciences", "issn_l": "2076-3417", "issn": ["2076-3417"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Applied Sciences", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref", "doaj"], "open_access": {"is_oa": true, "oa_status": "gold", "oa_url": "https://www.mdpi.com/2076-3417/12/2/937/pdf?version=1642436013", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5054621323", "display_name": "J\u0101nis \u0100rents", "orcid": "https://orcid.org/0000-0001-5203-3347"}, "institutions": [{"id": "https://openalex.org/I4210162447", "display_name": "Institute of Electronics and Computer Science", "ror": "https://ror.org/05bsp2531", "country_code": "LV", "type": "facility", "lineage": ["https://openalex.org/I4210162447", "https://openalex.org/I70055295"]}], "countries": ["LV"], "is_corresponding": true, "raw_author_name": "Janis Arents", "raw_affiliation_strings": ["Institute of Electronics and Computer Science, 14 Dzerbenes St., LV-1006 Riga, Latvia"], "raw_orcid": "https://orcid.org/0000-0001-5203-3347", "affiliations": [{"raw_affiliation_string": "Institute of Electronics and Computer Science, 14 Dzerbenes St., LV-1006 Riga, Latvia", "institution_ids": ["https://openalex.org/I4210162447"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5072892931", "display_name": "Modris Greit\u0101ns", "orcid": "https://orcid.org/0000-0002-5405-0738"}, "institutions": [{"id": "https://openalex.org/I4210162447", "display_name": "Institute of Electronics and Computer Science", "ror": "https://ror.org/05bsp2531", "country_code": "LV", "type": "facility", "lineage": ["https://openalex.org/I4210162447", "https://openalex.org/I70055295"]}], "countries": ["LV"], "is_corresponding": false, "raw_author_name": "Modris Greitans", "raw_affiliation_strings": ["Institute of Electronics and Computer Science, 14 Dzerbenes St., LV-1006 Riga, Latvia"], "raw_orcid": "https://orcid.org/0000-0002-5405-0738", "affiliations": [{"raw_affiliation_string": "Institute of Electronics and Computer Science, 14 Dzerbenes St., LV-1006 Riga, Latvia", "institution_ids": ["https://openalex.org/I4210162447"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": ["https://openalex.org/A5054621323"], "corresponding_institution_ids": ["https://openalex.org/I4210162447"], "apc_list": {"value": 2300, "currency": "CHF", "value_usd": 2490}, "apc_paid": {"value": 2300, "currency": "CHF", "value_usd": 2490}, "fwci": 30.6997, "has_fulltext": false, "cited_by_count": 295, "citation_normalized_percentile": {"value": 0.99940468, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 99, "max": 100}, "biblio": {"volume": "12", "issue": "2", "first_page": "937", "last_page": "937"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9980000257492065, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9980000257492065, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12111", "display_name": "Industrial Vision Systems and Defect Detection", "score": 0.9890999794006348, "subfield": {"id": "https://openalex.org/subfields/2209", "display_name": "Industrial and Manufacturing Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10763", "display_name": "Digital Transformation in Industry", "score": 0.9733999967575073, "subfield": {"id": "https://openalex.org/subfields/2209", "display_name": "Industrial and Manufacturing Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.5922507047653198}, {"id": "https://openalex.org/keywords/flexibility", "display_name": "Flexibility (engineering)", "score": 0.5631266832351685}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.46441027522087097}, {"id": "https://openalex.org/keywords/field", "display_name": "Field (mathematics)", "score": 0.46021947264671326}, {"id": "https://openalex.org/keywords/smart-manufacturing", "display_name": "Smart manufacturing", "score": 0.4564518928527832}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.42232292890548706}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.4212660491466522}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.41628211736679077}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.36408108472824097}, {"id": "https://openalex.org/keywords/manufacturing-engineering", "display_name": "Manufacturing engineering", "score": 0.31831610202789307}, {"id": "https://openalex.org/keywords/management", "display_name": "Management", "score": 0.07213842868804932}], "concepts": [{"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.5922507047653198}, {"id": "https://openalex.org/C2780598303", "wikidata": "https://www.wikidata.org/wiki/Q65921492", "display_name": "Flexibility (engineering)", "level": 2, "score": 0.5631266832351685}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.46441027522087097}, {"id": "https://openalex.org/C9652623", "wikidata": "https://www.wikidata.org/wiki/Q190109", "display_name": "Field (mathematics)", "level": 2, "score": 0.46021947264671326}, {"id": "https://openalex.org/C2988642114", "wikidata": "https://www.wikidata.org/wiki/Q25112020", "display_name": "Smart manufacturing", "level": 2, "score": 0.4564518928527832}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.42232292890548706}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.4212660491466522}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.41628211736679077}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.36408108472824097}, {"id": "https://openalex.org/C117671659", "wikidata": "https://www.wikidata.org/wiki/Q11049265", "display_name": "Manufacturing engineering", "level": 1, "score": 0.31831610202789307}, {"id": "https://openalex.org/C187736073", "wikidata": "https://www.wikidata.org/wiki/Q2920921", "display_name": "Management", "level": 1, "score": 0.07213842868804932}, {"id": "https://openalex.org/C202444582", "wikidata": "https://www.wikidata.org/wiki/Q837863", "display_name": "Pure mathematics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C162324750", "wikidata": "https://www.wikidata.org/wiki/Q8134", "display_name": "Economics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 3, "locations": [{"id": "doi:10.3390/app12020937", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/app12020937", "pdf_url": "https://www.mdpi.com/2076-3417/12/2/937/pdf?version=1642436013", "source": {"id": "https://openalex.org/S4210205812", "display_name": "Applied Sciences", "issn_l": "2076-3417", "issn": ["2076-3417"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Applied Sciences", "raw_type": "journal-article"}, {"id": "pmh:oai:doaj.org/article:51d3cb9089504cc1b4d66cdfdaa331be", "is_oa": true, "landing_page_url": "https://doaj.org/article/51d3cb9089504cc1b4d66cdfdaa331be", "pdf_url": null, "source": {"id": "https://openalex.org/S4306401280", "display_name": "DOAJ (DOAJ: Directory of Open Access Journals)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by-sa", "license_id": "https://openalex.org/licenses/cc-by-sa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Applied Sciences, Vol 12, Iss 2, p 937 (2022)", "raw_type": "article"}, {"id": "pmh:oai:mdpi.com:/2076-3417/12/2/937/", "is_oa": true, "landing_page_url": "https://dx.doi.org/10.3390/app12020937", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400947", "display_name": "MDPI (MDPI AG)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I4210097602", "host_organization_name": "Multidisciplinary Digital Publishing Institute (Switzerland)", "host_organization_lineage": ["https://openalex.org/I4210097602"], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Applied Sciences; Volume 12; Issue 2; Pages: 937", "raw_type": "Text"}], "best_oa_location": {"id": "doi:10.3390/app12020937", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/app12020937", "pdf_url": "https://www.mdpi.com/2076-3417/12/2/937/pdf?version=1642436013", "source": {"id": "https://openalex.org/S4210205812", "display_name": "Applied Sciences", "issn_l": "2076-3417", "issn": ["2076-3417"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Applied Sciences", "raw_type": "journal-article"}, "sustainable_development_goals": [{"display_name": "Industry, innovation and infrastructure", "id": "https://metadata.un.org/sdg/9", "score": 0.6000000238418579}], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W4205837126.pdf"}, "referenced_works_count": 110, "referenced_works": ["https://openalex.org/W639708223", "https://openalex.org/W1258130145", "https://openalex.org/W1481659984", "https://openalex.org/W1606048812", "https://openalex.org/W1977655452", "https://openalex.org/W1988785119", "https://openalex.org/W1991544872", "https://openalex.org/W1999156278", "https://openalex.org/W2012392077", "https://openalex.org/W2044165158", "https://openalex.org/W2074220104", "https://openalex.org/W2089806275", "https://openalex.org/W2132862423", "https://openalex.org/W2174644224", "https://openalex.org/W2205414745", "https://openalex.org/W2257979135", "https://openalex.org/W2570343428", "https://openalex.org/W2604236302", "https://openalex.org/W2604726708", "https://openalex.org/W2604736759", "https://openalex.org/W2615190535", "https://openalex.org/W2766196346", "https://openalex.org/W2767346351", "https://openalex.org/W2769112066", "https://openalex.org/W2771075179", "https://openalex.org/W2771896380", "https://openalex.org/W2793432429", "https://openalex.org/W2794908222", "https://openalex.org/W2891372393", "https://openalex.org/W2901362978", "https://openalex.org/W2902780211", "https://openalex.org/W2907108913", "https://openalex.org/W2910474428", "https://openalex.org/W2910788814", "https://openalex.org/W2912196091", "https://openalex.org/W2913358857", "https://openalex.org/W2919358988", "https://openalex.org/W2958510209", "https://openalex.org/W2962736495", "https://openalex.org/W2962759351", "https://openalex.org/W2962793652", "https://openalex.org/W2963033241", "https://openalex.org/W2963188159", "https://openalex.org/W2963669336", "https://openalex.org/W2964239605", "https://openalex.org/W2964249569", "https://openalex.org/W2964333597", "https://openalex.org/W2967727187", "https://openalex.org/W2968268581", "https://openalex.org/W2969113429", "https://openalex.org/W2970520611", "https://openalex.org/W2976205474", "https://openalex.org/W2979417040", "https://openalex.org/W2980888398", "https://openalex.org/W2981697369", "https://openalex.org/W2984673978", "https://openalex.org/W2984754495", "https://openalex.org/W2993476843", "https://openalex.org/W2995284451", "https://openalex.org/W3003620461", "https://openalex.org/W3004047800", "https://openalex.org/W3008535267", "https://openalex.org/W3010515602", "https://openalex.org/W3010834209", "https://openalex.org/W3016690394", "https://openalex.org/W3016969588", "https://openalex.org/W3023241112", "https://openalex.org/W3028308378", "https://openalex.org/W3028964554", "https://openalex.org/W3033312324", "https://openalex.org/W3044115425", "https://openalex.org/W3046735138", "https://openalex.org/W3046748421", "https://openalex.org/W3082854280", "https://openalex.org/W3088013802", "https://openalex.org/W3088158297", "https://openalex.org/W3089580269", "https://openalex.org/W3092037789", "https://openalex.org/W3096973715", "https://openalex.org/W3099982022", "https://openalex.org/W3100172161", "https://openalex.org/W3100789280", "https://openalex.org/W3101103779", "https://openalex.org/W3112160583", "https://openalex.org/W3122928565", "https://openalex.org/W3125366729", "https://openalex.org/W3127352841", "https://openalex.org/W3128707967", "https://openalex.org/W3131225419", "https://openalex.org/W3131554325", "https://openalex.org/W3131966994", "https://openalex.org/W3133743901", "https://openalex.org/W3133750590", "https://openalex.org/W3136021864", "https://openalex.org/W3161970973", "https://openalex.org/W3167898434", "https://openalex.org/W3181223963", "https://openalex.org/W3194459689", "https://openalex.org/W4242227498", "https://openalex.org/W4252782962", "https://openalex.org/W4312562390", "https://openalex.org/W4313031817", "https://openalex.org/W6620707391", "https://openalex.org/W6622324178", "https://openalex.org/W6668794633", "https://openalex.org/W6721962699", "https://openalex.org/W6764969207", "https://openalex.org/W6771689172", "https://openalex.org/W6777660902", "https://openalex.org/W6795768249"], "related_works": ["https://openalex.org/W1508899372", "https://openalex.org/W4236696095", "https://openalex.org/W3143779693", "https://openalex.org/W2012658348", "https://openalex.org/W3013410248", "https://openalex.org/W2910904538", "https://openalex.org/W3092604565", "https://openalex.org/W4214836412", "https://openalex.org/W2770239401", "https://openalex.org/W2907002303"], "abstract_inverted_index": {"Industrial": [0], "robots": [1, 32], "and": [2, 28, 50, 86, 106, 115, 122, 132], "associated": [3], "control": [4, 24, 75, 120], "methods": [5, 107], "are": [6, 38, 57, 100, 108, 127, 135], "continuously": [7], "developing.": [8], "With": [9], "the": [10, 14, 43, 59, 69, 83, 88, 96, 138], "recent": [11], "progress": [12], "in": [13, 21, 76, 125], "field": [15], "of": [16, 42, 46, 53, 71, 91], "artificial": [17], "intelligence,": [18], "new": [19], "perspectives": [20], "industrial": [22, 73, 93], "robot": [23, 74, 119], "strategies": [25, 105], "have": [26, 33], "emerged,": [27], "prospects": [29], "towards": [30, 78], "cognitive": [31], "arisen.": [34], "AI-based": [35], "robotic": [36], "systems": [37], "strongly": [39], "becoming": [40, 58], "one": [41], "main": [44], "areas": [45], "focus,": [47], "as": [48], "flexibility": [49], "deep": [51, 112], "understanding": [52], "complex": [54], "manufacturing": [55, 77, 126], "processes": [56], "key": [60], "advantage": [61], "to": [62], "raise": [63], "competitiveness.": [64], "This": [65], "review": [66], "first": [67], "expresses": [68], "significance": [70], "smart": [72, 92], "future": [79], "factories": [80], "by": [81], "listing": [82], "needs,": [84], "requirements": [85], "introducing": [87], "envisioned": [89], "concept": [90], "robots.": [94], "Secondly,": [95], "current": [97], "trends": [98], "that": [99], "based": [101, 118], "on": [102], "different": [103], "learning": [104, 114, 117], "explored.": [109], "Current": [110], "computer-vision,": [111], "reinforcement": [113], "imitation": [116], "approaches": [121], "possible": [123], "applications": [124], "investigated.": [128], "Gaps,": [129], "challenges,": [130], "limitations": [131], "open": [133], "issues": [134], "identified": [136], "along": [137], "way.": [139]}, "counts_by_year": [{"year": 2026, "cited_by_count": 33}, {"year": 2025, "cited_by_count": 100}, {"year": 2024, "cited_by_count": 83}, {"year": 2023, "cited_by_count": 51}, {"year": 2022, "cited_by_count": 28}], "updated_date": "2026-07-04T07:58:01.006859", "created_date": "2022-01-26T00:00:00"}, {"id": "https://openalex.org/W2897661175", "doi": "https://doi.org/10.1016/j.arcontrol.2018.09.005", "title": "Reinforcement learning for control: Performance, stability, and deep approximators", "display_name": "Reinforcement learning for control: Performance, stability, and deep approximators", "relevance_score": 310.8046, "publication_year": 2018, "publication_date": "2018-01-01", "ids": {"openalex": "https://openalex.org/W2897661175", "doi": "https://doi.org/10.1016/j.arcontrol.2018.09.005", "mag": "2897661175"}, "language": "en", "primary_location": {"id": "doi:10.1016/j.arcontrol.2018.09.005", "is_oa": true, "landing_page_url": "https://doi.org/10.1016/j.arcontrol.2018.09.005", "pdf_url": "https://www.sciencedirect.com/science/article/pii/S1367578818301184", "source": {"id": "https://openalex.org/S54761077", "display_name": "Annual Reviews in Control", "issn_l": "1367-5788", "issn": ["1367-5788", "1872-9088"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Annual Reviews in Control", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "bronze", "oa_url": "https://www.sciencedirect.com/science/article/pii/S1367578818301184", "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5058935509", "display_name": "Lucian Bu\u015foniu", "orcid": "https://orcid.org/0000-0001-8017-1296"}, "institutions": [{"id": "https://openalex.org/I158333966", "display_name": "Technical University of Cluj-Napoca", "ror": "https://ror.org/03r8nwp71", "country_code": "RO", "type": "education", "lineage": ["https://openalex.org/I158333966"]}], "countries": ["RO"], "is_corresponding": true, "raw_author_name": "Lucian Bu\u015foniu", "raw_affiliation_strings": ["Technical University of Cluj-Napoca, Romania"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Technical University of Cluj-Napoca, Romania", "institution_ids": ["https://openalex.org/I158333966"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5033347648", "display_name": "Tim de Bruin", "orcid": "https://orcid.org/0000-0003-1741-0850"}, "institutions": [{"id": "https://openalex.org/I98358874", "display_name": "Delft University of Technology", "ror": "https://ror.org/02e2c7k09", "country_code": "NL", "type": "education", "lineage": ["https://openalex.org/I98358874"]}], "countries": ["NL"], "is_corresponding": false, "raw_author_name": "Tim de Bruin", "raw_affiliation_strings": ["Delft University of Technology, the Netherlands"], "raw_orcid": "https://orcid.org/0000-0003-1741-0850", "affiliations": [{"raw_affiliation_string": "Delft University of Technology, the Netherlands", "institution_ids": ["https://openalex.org/I98358874"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5013432227", "display_name": "Domagoj Toli\u0107", "orcid": "https://orcid.org/0000-0002-0988-889X"}, "institutions": [{"id": "https://openalex.org/I2799440398", "display_name": "Rochester Institute of Technology Croatia", "ror": "https://ror.org/03jgxzm03", "country_code": "HR", "type": "education", "lineage": ["https://openalex.org/I2799440398"]}], "countries": ["HR"], "is_corresponding": false, "raw_author_name": "Domagoj Toli\u0107", "raw_affiliation_strings": ["RIT Croatia, Don Frana Buli\u0107a 6, Dubrovnik 20000, Croatia"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "RIT Croatia, Don Frana Buli\u0107a 6, Dubrovnik 20000, Croatia", "institution_ids": ["https://openalex.org/I2799440398"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5035229829", "display_name": "Jens Kober", "orcid": "https://orcid.org/0000-0001-7257-5434"}, "institutions": [{"id": "https://openalex.org/I98358874", "display_name": "Delft University of Technology", "ror": "https://ror.org/02e2c7k09", "country_code": "NL", "type": "education", "lineage": ["https://openalex.org/I98358874"]}], "countries": ["NL"], "is_corresponding": false, "raw_author_name": "Jens Kober", "raw_affiliation_strings": ["Delft University of Technology, the Netherlands"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Delft University of Technology, the Netherlands", "institution_ids": ["https://openalex.org/I98358874"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5089210338", "display_name": "Ivana Palunko", "orcid": "https://orcid.org/0000-0001-5204-2294"}, "institutions": [{"id": "https://openalex.org/I180629606", "display_name": "University of Dubrovnik", "ror": "https://ror.org/05yptqp13", "country_code": "HR", "type": "education", "lineage": ["https://openalex.org/I180629606"]}], "countries": ["HR"], "is_corresponding": false, "raw_author_name": "Ivana Palunko", "raw_affiliation_strings": ["University of Dubrovnik, \u0106ira Cari\u0107a 4, Dubrovnik 20000, Croatia"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Dubrovnik, \u0106ira Cari\u0107a 4, Dubrovnik 20000, Croatia", "institution_ids": ["https://openalex.org/I180629606"]}]}], "institutions": [], "countries_distinct_count": 3, "institutions_distinct_count": 4, "corresponding_author_ids": ["https://openalex.org/A5058935509"], "corresponding_institution_ids": ["https://openalex.org/I158333966"], "apc_list": {"value": 3190, "currency": "USD", "value_usd": 3190}, "apc_paid": null, "fwci": 26.5647, "has_fulltext": true, "cited_by_count": 452, "citation_normalized_percentile": {"value": 0.99757378, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 90, "max": 100}, "biblio": {"volume": "46", "issue": null, "first_page": "8", "last_page": "28"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9998999834060669, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9998999834060669, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9966999888420105, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10603", "display_name": "Smart Grid Energy Management", "score": 0.9740999937057495, "subfield": {"id": "https://openalex.org/subfields/2208", "display_name": "Electrical and Electronic Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8671159744262695}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6396303176879883}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.6285991668701172}, {"id": "https://openalex.org/keywords/stability", "display_name": "Stability (learning theory)", "score": 0.6061824560165405}, {"id": "https://openalex.org/keywords/artificial-neural-network", "display_name": "Artificial neural network", "score": 0.5570380687713623}, {"id": "https://openalex.org/keywords/bridging", "display_name": "Bridging (networking)", "score": 0.5225008130073547}, {"id": "https://openalex.org/keywords/optimal-control", "display_name": "Optimal control", "score": 0.4995760917663574}, {"id": "https://openalex.org/keywords/field", "display_name": "Field (mathematics)", "score": 0.47261834144592285}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.4725465774536133}, {"id": "https://openalex.org/keywords/deep-learning", "display_name": "Deep learning", "score": 0.41819262504577637}, {"id": "https://openalex.org/keywords/stochastic-control", "display_name": "Stochastic control", "score": 0.4164021611213684}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.3138045072555542}, {"id": "https://openalex.org/keywords/mathematical-optimization", "display_name": "Mathematical optimization", "score": 0.27853113412857056}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.17633822560310364}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8671159744262695}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6396303176879883}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.6285991668701172}, {"id": "https://openalex.org/C112972136", "wikidata": "https://www.wikidata.org/wiki/Q7595718", "display_name": "Stability (learning theory)", "level": 2, "score": 0.6061824560165405}, {"id": "https://openalex.org/C50644808", "wikidata": "https://www.wikidata.org/wiki/Q192776", "display_name": "Artificial neural network", "level": 2, "score": 0.5570380687713623}, {"id": "https://openalex.org/C174348530", "wikidata": "https://www.wikidata.org/wiki/Q188635", "display_name": "Bridging (networking)", "level": 2, "score": 0.5225008130073547}, {"id": "https://openalex.org/C91575142", "wikidata": "https://www.wikidata.org/wiki/Q1971426", "display_name": "Optimal control", "level": 2, "score": 0.4995760917663574}, {"id": "https://openalex.org/C9652623", "wikidata": "https://www.wikidata.org/wiki/Q190109", "display_name": "Field (mathematics)", "level": 2, "score": 0.47261834144592285}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.4725465774536133}, {"id": "https://openalex.org/C108583219", "wikidata": "https://www.wikidata.org/wiki/Q197536", "display_name": "Deep learning", "level": 2, "score": 0.41819262504577637}, {"id": "https://openalex.org/C170131372", "wikidata": "https://www.wikidata.org/wiki/Q7617811", "display_name": "Stochastic control", "level": 3, "score": 0.4164021611213684}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.3138045072555542}, {"id": "https://openalex.org/C126255220", "wikidata": "https://www.wikidata.org/wiki/Q141495", "display_name": "Mathematical optimization", "level": 1, "score": 0.27853113412857056}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.17633822560310364}, {"id": "https://openalex.org/C202444582", "wikidata": "https://www.wikidata.org/wiki/Q837863", "display_name": "Pure mathematics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C31258907", "wikidata": "https://www.wikidata.org/wiki/Q1301371", "display_name": "Computer network", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "doi:10.1016/j.arcontrol.2018.09.005", "is_oa": true, "landing_page_url": "https://doi.org/10.1016/j.arcontrol.2018.09.005", "pdf_url": "https://www.sciencedirect.com/science/article/pii/S1367578818301184", "source": {"id": "https://openalex.org/S54761077", "display_name": "Annual Reviews in Control", "issn_l": "1367-5788", "issn": ["1367-5788", "1872-9088"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Annual Reviews in Control", "raw_type": "journal-article"}, {"id": "pmh:oai:tudelft.nl:uuid:2d94c894-6450-40ee-84a9-36d284a0f195", "is_oa": false, "landing_page_url": "http://resolver.tudelft.nl/uuid:2d94c894-6450-40ee-84a9-36d284a0f195", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400906", "display_name": "Research Repository (Delft University of Technology)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I98358874", "host_organization_name": "Delft University of Technology", "host_organization_lineage": ["https://openalex.org/I98358874"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "review"}], "best_oa_location": {"id": "doi:10.1016/j.arcontrol.2018.09.005", "is_oa": true, "landing_page_url": "https://doi.org/10.1016/j.arcontrol.2018.09.005", "pdf_url": "https://www.sciencedirect.com/science/article/pii/S1367578818301184", "source": {"id": "https://openalex.org/S54761077", "display_name": "Annual Reviews in Control", "issn_l": "1367-5788", "issn": ["1367-5788", "1872-9088"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Annual Reviews in Control", "raw_type": "journal-article"}, "sustainable_development_goals": [], "awards": [{"id": "https://openalex.org/G2044052273", "display_name": null, "funder_award_id": "9/2018", "funder_id": "https://openalex.org/F4320323983", "funder_display_name": "Unitatea Executiva pentru Finantarea Invatamantului Superior, a Cercetarii, Dezvoltarii si Inovarii"}, {"id": "https://openalex.org/G4460266531", "display_name": null, "funder_award_id": "9/2018", "funder_id": "https://openalex.org/F4320323444", "funder_display_name": "Autoritatea National\u0103 pentru Cercetare Stiintific\u0103"}, {"id": "https://openalex.org/G5391329560", "display_name": "Control of Dynamical Systems", "funder_award_id": "IP-2016-06-2468", "funder_id": "https://openalex.org/F4320322674", "funder_display_name": "Hrvatska Zaklada za Znanost"}, {"id": "https://openalex.org/G6058054535", "display_name": null, "funder_award_id": "PN-III-P1-1.1", "funder_id": "https://openalex.org/F4320323983", "funder_display_name": "Unitatea Executiva pentru Finantarea Invatamantului Superior, a Cercetarii, Dezvoltarii si Inovarii"}, {"id": "https://openalex.org/G7688415803", "display_name": null, "funder_award_id": "PN-III-P1-1", "funder_id": "https://openalex.org/F4320323983", "funder_display_name": "Unitatea Executiva pentru Finantarea Invatamantului Superior, a Cercetarii, Dezvoltarii si Inovarii"}, {"id": "https://openalex.org/G8459610185", "display_name": null, "funder_award_id": "PN-III-P1-1.1-TE-2016-0670", "funder_id": "https://openalex.org/F4320323983", "funder_display_name": "Unitatea Executiva pentru Finantarea Invatamantului Superior, a Cercetarii, Dezvoltarii si Inovarii"}], "funders": [{"id": "https://openalex.org/F4320321800", "display_name": "Nederlandse Organisatie voor Wetenschappelijk Onderzoek", "ror": "https://ror.org/04jsz6e67"}, {"id": "https://openalex.org/F4320322674", "display_name": "Hrvatska Zaklada za Znanost", "ror": "https://ror.org/03n51vw80"}, {"id": "https://openalex.org/F4320323444", "display_name": "Autoritatea National\u0103 pentru Cercetare Stiintific\u0103", "ror": "https://ror.org/03padf885"}, {"id": "https://openalex.org/F4320323983", "display_name": "Unitatea Executiva pentru Finantarea Invatamantului Superior, a Cercetarii, Dezvoltarii si Inovarii", "ror": "https://ror.org/01q7jq182"}], "has_content": {"grobid_xml": true, "pdf": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W2897661175.pdf", "grobid_xml": "https://content.openalex.org/works/W2897661175.grobid-xml"}, "referenced_works_count": 271, "referenced_works": ["https://openalex.org/W32403112", "https://openalex.org/W47171462", "https://openalex.org/W146900863", "https://openalex.org/W166862392", "https://openalex.org/W355724390", "https://openalex.org/W567721252", "https://openalex.org/W1037351197", "https://openalex.org/W1164749991", "https://openalex.org/W1191599655", "https://openalex.org/W1504212531", "https://openalex.org/W1508748501", "https://openalex.org/W1513016807", "https://openalex.org/W1514587017", "https://openalex.org/W1522842874", "https://openalex.org/W1529558080", "https://openalex.org/W1540560684", "https://openalex.org/W1547105496", "https://openalex.org/W1547925194", "https://openalex.org/W1549543673", "https://openalex.org/W1550698229", "https://openalex.org/W1552830313", "https://openalex.org/W1554698619", "https://openalex.org/W1557517019", "https://openalex.org/W1575592356", "https://openalex.org/W1576452626", "https://openalex.org/W1578936488", "https://openalex.org/W1579026795", "https://openalex.org/W1592847719", "https://openalex.org/W1601081659", "https://openalex.org/W1614417283", "https://openalex.org/W1625390266", "https://openalex.org/W1626155273", "https://openalex.org/W1631623342", "https://openalex.org/W1726806267", "https://openalex.org/W1745373831", "https://openalex.org/W1825869920", "https://openalex.org/W1850488217", "https://openalex.org/W1899249567", "https://openalex.org/W1899504021", "https://openalex.org/W1968962398", "https://openalex.org/W1977655452", "https://openalex.org/W1983362322", "https://openalex.org/W1988115241", "https://openalex.org/W1993411524", "https://openalex.org/W1998376807", "https://openalex.org/W1999912147", "https://openalex.org/W2009303086", "https://openalex.org/W2012501796", "https://openalex.org/W2012587148", "https://openalex.org/W2024780670", "https://openalex.org/W2027968610", "https://openalex.org/W2034005720", "https://openalex.org/W2038794597", "https://openalex.org/W2046376809", "https://openalex.org/W2048226872", "https://openalex.org/W2049934117", "https://openalex.org/W2052305027", "https://openalex.org/W2054938976", "https://openalex.org/W2056354534", "https://openalex.org/W2072931156", "https://openalex.org/W2073107347", "https://openalex.org/W2073384958", "https://openalex.org/W2091130426", "https://openalex.org/W2091565802", "https://openalex.org/W2094387729", "https://openalex.org/W2095487261", "https://openalex.org/W2096018174", "https://openalex.org/W2097451572", "https://openalex.org/W2098432798", "https://openalex.org/W2099618002", "https://openalex.org/W2099767582", "https://openalex.org/W2100495367", "https://openalex.org/W2101786389", "https://openalex.org/W2104733512", "https://openalex.org/W2107726111", "https://openalex.org/W2108682071", "https://openalex.org/W2113913482", "https://openalex.org/W2114599381", "https://openalex.org/W2117355432", "https://openalex.org/W2119567691", "https://openalex.org/W2119717200", "https://openalex.org/W2120346334", "https://openalex.org/W2121863487", "https://openalex.org/W2122410182", "https://openalex.org/W2125569215", "https://openalex.org/W2128163097", "https://openalex.org/W2129036575", "https://openalex.org/W2130599357", "https://openalex.org/W2130801532", "https://openalex.org/W2139418546", "https://openalex.org/W2141559645", "https://openalex.org/W2144446635", "https://openalex.org/W2145339207", "https://openalex.org/W2146444479", "https://openalex.org/W2148439597", "https://openalex.org/W2150339816", "https://openalex.org/W2150459019", "https://openalex.org/W2151661095", "https://openalex.org/W2152161277", "https://openalex.org/W2153267861", "https://openalex.org/W2153947321", "https://openalex.org/W2155007355", "https://openalex.org/W2155027007", "https://openalex.org/W2155968351", "https://openalex.org/W2156974606", "https://openalex.org/W2157701998", "https://openalex.org/W2158049833", "https://openalex.org/W2158738729", "https://openalex.org/W2160067530", "https://openalex.org/W2162807113", "https://openalex.org/W2165131254", "https://openalex.org/W2165150801", "https://openalex.org/W2165418472", "https://openalex.org/W2165726932", "https://openalex.org/W2168027532", "https://openalex.org/W2168405694", "https://openalex.org/W2168839459", "https://openalex.org/W2169106666", "https://openalex.org/W2172968643", "https://openalex.org/W2173248099", "https://openalex.org/W2174786457", "https://openalex.org/W2188721763", "https://openalex.org/W2236244207", "https://openalex.org/W2257979135", "https://openalex.org/W2284050935", "https://openalex.org/W2294100503", "https://openalex.org/W2399790246", "https://openalex.org/W2411690432", "https://openalex.org/W2419612459", "https://openalex.org/W2433379750", "https://openalex.org/W2440926996", "https://openalex.org/W2509374375", "https://openalex.org/W2520501711", "https://openalex.org/W2524958279", "https://openalex.org/W2525579820", "https://openalex.org/W2527081555", "https://openalex.org/W2529970964", "https://openalex.org/W2542673510", "https://openalex.org/W2547823007", "https://openalex.org/W2556958149", "https://openalex.org/W2557283755", "https://openalex.org/W2563378438", "https://openalex.org/W2563830277", "https://openalex.org/W2580175322", "https://openalex.org/W2586680856", "https://openalex.org/W2593766708", "https://openalex.org/W2605048551", "https://openalex.org/W2614801269", "https://openalex.org/W2619484182", "https://openalex.org/W2620671107", "https://openalex.org/W2733961795", "https://openalex.org/W2736601468", "https://openalex.org/W2737906791", "https://openalex.org/W2745868649", "https://openalex.org/W2749680651", "https://openalex.org/W2754517384", "https://openalex.org/W2760506156", "https://openalex.org/W2761873684", "https://openalex.org/W2765340762", "https://openalex.org/W2765650568", "https://openalex.org/W2766447205", "https://openalex.org/W2788837839", "https://openalex.org/W2790924949", "https://openalex.org/W2797163658", "https://openalex.org/W2892979040", "https://openalex.org/W2949117887", "https://openalex.org/W2949608212", "https://openalex.org/W2950220847", "https://openalex.org/W2950872548", "https://openalex.org/W2950929549", "https://openalex.org/W2951143668", "https://openalex.org/W2951781666", "https://openalex.org/W2962717849", "https://openalex.org/W2962749646", "https://openalex.org/W2962847657", "https://openalex.org/W2963019567", "https://openalex.org/W2963095800", "https://openalex.org/W2963254349", "https://openalex.org/W2963430173", "https://openalex.org/W2963477884", "https://openalex.org/W2963634205", "https://openalex.org/W2963864421", "https://openalex.org/W2963982496", "https://openalex.org/W2964043796", "https://openalex.org/W2964161785", "https://openalex.org/W2964319760", "https://openalex.org/W3004707083", "https://openalex.org/W3011120880", "https://openalex.org/W3020075106", "https://openalex.org/W3020125231", "https://openalex.org/W3037932933", "https://openalex.org/W3103182070", "https://openalex.org/W3103559770", "https://openalex.org/W3123298421", "https://openalex.org/W3137695714", "https://openalex.org/W3139377883", "https://openalex.org/W4205513846", "https://openalex.org/W4205616158", "https://openalex.org/W4211221179", "https://openalex.org/W4213001083", "https://openalex.org/W4214717370", "https://openalex.org/W4229494842", "https://openalex.org/W4245108548", "https://openalex.org/W4245296547", "https://openalex.org/W4249441547", "https://openalex.org/W4285719527", "https://openalex.org/W4297791094", "https://openalex.org/W4298174377", "https://openalex.org/W4299494854", "https://openalex.org/W4307347247", "https://openalex.org/W4365800129", "https://openalex.org/W4376542976", "https://openalex.org/W4400518895", "https://openalex.org/W6606719070", "https://openalex.org/W6616173779", "https://openalex.org/W6623316541", "https://openalex.org/W6630225054", "https://openalex.org/W6630907848", "https://openalex.org/W6632591418", "https://openalex.org/W6632901617", "https://openalex.org/W6634413486", "https://openalex.org/W6636578284", "https://openalex.org/W6638018090", "https://openalex.org/W6638667902", "https://openalex.org/W6639039794", "https://openalex.org/W6639736602", "https://openalex.org/W6642419745", "https://openalex.org/W6674888299", "https://openalex.org/W6674974689", "https://openalex.org/W6675999342", "https://openalex.org/W6676023451", "https://openalex.org/W6676024505", "https://openalex.org/W6677549092", "https://openalex.org/W6677737365", "https://openalex.org/W6678500653", "https://openalex.org/W6679257226", "https://openalex.org/W6679630895", "https://openalex.org/W6680402377", "https://openalex.org/W6682262797", "https://openalex.org/W6682802936", "https://openalex.org/W6682849425", "https://openalex.org/W6682897353", "https://openalex.org/W6682953939", "https://openalex.org/W6683204974", "https://openalex.org/W6683356630", "https://openalex.org/W6683935339", "https://openalex.org/W6684205842", "https://openalex.org/W6684985892", "https://openalex.org/W6687306792", "https://openalex.org/W6687681856", "https://openalex.org/W6687688904", "https://openalex.org/W6688347243", "https://openalex.org/W6692846177", "https://openalex.org/W6693997783", "https://openalex.org/W6695676441", "https://openalex.org/W6696705104", "https://openalex.org/W6712922004", "https://openalex.org/W6715102896", "https://openalex.org/W6717230150", "https://openalex.org/W6718190810", "https://openalex.org/W6727147046", "https://openalex.org/W6729556111", "https://openalex.org/W6731672634", "https://openalex.org/W6734129231", "https://openalex.org/W6738741286", "https://openalex.org/W6738746681", "https://openalex.org/W6740801417", "https://openalex.org/W6742267698", "https://openalex.org/W6792155000", "https://openalex.org/W7039022430", "https://openalex.org/W7054112703"], "related_works": ["https://openalex.org/W2943897807", "https://openalex.org/W3120484221", "https://openalex.org/W4366198066", "https://openalex.org/W3047748938", "https://openalex.org/W2358522863", "https://openalex.org/W278441094", "https://openalex.org/W4386034604", "https://openalex.org/W3099285423", "https://openalex.org/W2336173978", "https://openalex.org/W2060950178"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2026, "cited_by_count": 23}, {"year": 2025, "cited_by_count": 70}, {"year": 2024, "cited_by_count": 89}, {"year": 2023, "cited_by_count": 81}, {"year": 2022, "cited_by_count": 60}, {"year": 2021, "cited_by_count": 67}, {"year": 2020, "cited_by_count": 43}, {"year": 2019, "cited_by_count": 18}, {"year": 2018, "cited_by_count": 1}], "updated_date": "2026-07-03T08:13:44.112507", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W4327571609", "doi": "https://doi.org/10.1016/j.artint.2023.103905", "title": "Safe multi-agent reinforcement learning for multi-robot control", "display_name": "Safe multi-agent reinforcement learning for multi-robot control", "relevance_score": 299.3828, "publication_year": 2023, "publication_date": "2023-03-16", "ids": {"openalex": "https://openalex.org/W4327571609", "doi": "https://doi.org/10.1016/j.artint.2023.103905"}, "language": "en", "primary_location": {"id": "doi:10.1016/j.artint.2023.103905", "is_oa": true, "landing_page_url": "https://doi.org/10.1016/j.artint.2023.103905", "pdf_url": null, "source": {"id": "https://openalex.org/S196139623", "display_name": "Artificial Intelligence", "issn_l": "0004-3702", "issn": ["0004-3702", "1872-7921"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": "cc-by-nc-nd", "license_id": "https://openalex.org/licenses/cc-by-nc-nd", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Artificial Intelligence", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "hybrid", "oa_url": "https://doi.org/10.1016/j.artint.2023.103905", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5062677595", "display_name": "Shangding Gu", "orcid": "https://orcid.org/0000-0002-2722-3779"}, "institutions": [{"id": "https://openalex.org/I20231570", "display_name": "Peking University", "ror": "https://ror.org/02v51f717", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I20231570"]}, {"id": "https://openalex.org/I4210100255", "display_name": "Beijing Academy of Artificial Intelligence", "ror": "https://ror.org/016a74861", "country_code": "CN", "type": "other", "lineage": ["https://openalex.org/I4210100255"]}, {"id": "https://openalex.org/I62916508", "display_name": "Technical University of Munich", "ror": "https://ror.org/02kkvpp62", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I62916508"]}], "countries": ["CN", "DE"], "is_corresponding": false, "raw_author_name": "Shangding Gu", "raw_affiliation_strings": ["Department of Computer Science, Technical University of Munich, Germany", "Institute for Artificial Intelligence, Peking University, China"], "raw_orcid": "https://orcid.org/0000-0002-2722-3779", "affiliations": [{"raw_affiliation_string": "Department of Computer Science, Technical University of Munich, Germany", "institution_ids": ["https://openalex.org/I62916508"]}, {"raw_affiliation_string": "Institute for Artificial Intelligence, Peking University, China", "institution_ids": ["https://openalex.org/I4210100255", "https://openalex.org/I20231570"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5038889758", "display_name": "Jakub Grudzien Kuba", "orcid": null}, "institutions": [{"id": "https://openalex.org/I40120149", "display_name": "University of Oxford", "ror": "https://ror.org/052gg0110", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I40120149"]}], "countries": ["GB"], "is_corresponding": false, "raw_author_name": "Jakub Grudzien Kuba", "raw_affiliation_strings": ["Department of Statistics, University of Oxford, UK"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Statistics, University of Oxford, UK", "institution_ids": ["https://openalex.org/I40120149"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5102989564", "display_name": "Yuanpei Chen", "orcid": "https://orcid.org/0000-0002-4674-553X"}, "institutions": [{"id": "https://openalex.org/I20231570", "display_name": "Peking University", "ror": "https://ror.org/02v51f717", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I20231570"]}, {"id": "https://openalex.org/I4210100255", "display_name": "Beijing Academy of Artificial Intelligence", "ror": "https://ror.org/016a74861", "country_code": "CN", "type": "other", "lineage": ["https://openalex.org/I4210100255"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Yuanpei Chen", "raw_affiliation_strings": ["Institute for Artificial Intelligence, Peking University, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Artificial Intelligence, Peking University, China", "institution_ids": ["https://openalex.org/I4210100255", "https://openalex.org/I20231570"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5002080576", "display_name": "Yali Du", "orcid": "https://orcid.org/0000-0001-5683-2621"}, "institutions": [{"id": "https://openalex.org/I183935753", "display_name": "King's College London", "ror": "https://ror.org/0220mzb33", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I124357947", "https://openalex.org/I183935753"]}], "countries": ["GB"], "is_corresponding": false, "raw_author_name": "Yali Du", "raw_affiliation_strings": ["Department of Informatics, King's College London, UK"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Informatics, King's College London, UK", "institution_ids": ["https://openalex.org/I183935753"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5069283448", "display_name": "Yang Long", "orcid": "https://orcid.org/0000-0001-7600-3396"}, "institutions": [{"id": "https://openalex.org/I20231570", "display_name": "Peking University", "ror": "https://ror.org/02v51f717", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I20231570"]}, {"id": "https://openalex.org/I4210100255", "display_name": "Beijing Academy of Artificial Intelligence", "ror": "https://ror.org/016a74861", "country_code": "CN", "type": "other", "lineage": ["https://openalex.org/I4210100255"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Long Yang", "raw_affiliation_strings": ["Institute for Artificial Intelligence, Peking University, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Artificial Intelligence, Peking University, China", "institution_ids": ["https://openalex.org/I4210100255", "https://openalex.org/I20231570"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5063781430", "display_name": "Alois Knoll", "orcid": "https://orcid.org/0000-0003-4840-076X"}, "institutions": [{"id": "https://openalex.org/I62916508", "display_name": "Technical University of Munich", "ror": "https://ror.org/02kkvpp62", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I62916508"]}], "countries": ["DE"], "is_corresponding": false, "raw_author_name": "Alois Knoll", "raw_affiliation_strings": ["Department of Computer Science, Technical University of Munich, Germany"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Computer Science, Technical University of Munich, Germany", "institution_ids": ["https://openalex.org/I62916508"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5090073634", "display_name": "Yaodong Yang", "orcid": "https://orcid.org/0000-0001-8132-5613"}, "institutions": [{"id": "https://openalex.org/I20231570", "display_name": "Peking University", "ror": "https://ror.org/02v51f717", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I20231570"]}, {"id": "https://openalex.org/I4210100255", "display_name": "Beijing Academy of Artificial Intelligence", "ror": "https://ror.org/016a74861", "country_code": "CN", "type": "other", "lineage": ["https://openalex.org/I4210100255"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Yaodong Yang", "raw_affiliation_strings": ["Institute for Artificial Intelligence, Peking University, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Artificial Intelligence, Peking University, China", "institution_ids": ["https://openalex.org/I4210100255", "https://openalex.org/I20231570"]}]}], "institutions": [], "countries_distinct_count": 3, "institutions_distinct_count": 5, "corresponding_author_ids": ["https://openalex.org/A5090073634"], "corresponding_institution_ids": ["https://openalex.org/I20231570", "https://openalex.org/I4210100255"], "apc_list": {"value": 3670, "currency": "USD", "value_usd": 3670}, "apc_paid": {"value": 3670, "currency": "USD", "value_usd": 3670}, "fwci": 19.3642, "has_fulltext": false, "cited_by_count": 121, "citation_normalized_percentile": {"value": 0.99529698, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 96, "max": 100}, "biblio": {"volume": "319", "issue": null, "first_page": "103905", "last_page": "103905"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998999834060669, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998999834060669, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11099", "display_name": "Autonomous Vehicle Technology and Safety", "score": 0.9901000261306763, "subfield": {"id": "https://openalex.org/subfields/2203", "display_name": "Automotive Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9839000105857849, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.883181095123291}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.6126446723937988}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5663607716560364}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.5441109538078308}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.48300397396087646}, {"id": "https://openalex.org/keywords/state", "display_name": "State (computer science)", "score": 0.449796199798584}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.44975346326828003}, {"id": "https://openalex.org/keywords/marl", "display_name": "Marl", "score": 0.44429540634155273}, {"id": "https://openalex.org/keywords/mathematical-optimization", "display_name": "Mathematical optimization", "score": 0.386458158493042}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.17244932055473328}, {"id": "https://openalex.org/keywords/algorithm", "display_name": "Algorithm", "score": 0.13461393117904663}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.883181095123291}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.6126446723937988}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5663607716560364}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.5441109538078308}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.48300397396087646}, {"id": "https://openalex.org/C48103436", "wikidata": "https://www.wikidata.org/wiki/Q599031", "display_name": "State (computer science)", "level": 2, "score": 0.449796199798584}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.44975346326828003}, {"id": "https://openalex.org/C92927620", "wikidata": "https://www.wikidata.org/wiki/Q184053", "display_name": "Marl", "level": 3, "score": 0.44429540634155273}, {"id": "https://openalex.org/C126255220", "wikidata": "https://www.wikidata.org/wiki/Q141495", "display_name": "Mathematical optimization", "level": 1, "score": 0.386458158493042}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.17244932055473328}, {"id": "https://openalex.org/C11413529", "wikidata": "https://www.wikidata.org/wiki/Q8366", "display_name": "Algorithm", "level": 1, "score": 0.13461393117904663}, {"id": "https://openalex.org/C151730666", "wikidata": "https://www.wikidata.org/wiki/Q7205", "display_name": "Paleontology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C109007969", "wikidata": "https://www.wikidata.org/wiki/Q749565", "display_name": "Structural basin", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 3, "locations": [{"id": "doi:10.1016/j.artint.2023.103905", "is_oa": true, "landing_page_url": "https://doi.org/10.1016/j.artint.2023.103905", "pdf_url": null, "source": {"id": "https://openalex.org/S196139623", "display_name": "Artificial Intelligence", "issn_l": "0004-3702", "issn": ["0004-3702", "1872-7921"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": "cc-by-nc-nd", "license_id": "https://openalex.org/licenses/cc-by-nc-nd", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Artificial Intelligence", "raw_type": "journal-article"}, {"id": "pmh:oai:kclpure.kcl.ac.uk:openaire/4b6e2578-0b6d-455a-84f9-b317b80838cc", "is_oa": true, "landing_page_url": "https://kclpure.kcl.ac.uk/portal/en/publications/4b6e2578-0b6d-455a-84f9-b317b80838cc", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400216", "display_name": "Research Portal (King's College London)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I183935753", "host_organization_name": "King's College London", "host_organization_lineage": ["https://openalex.org/I183935753"], "host_organization_lineage_names": [], "type": "repository"}, "license": "other-oa", "license_id": "https://openalex.org/licenses/other-oa", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Gu, S, Grudzien Kuba, J, Chen, Y, Du, Y, Yang, L, Knoll, A & Yang, Y 2023, 'Safe multi-agent reinforcement learning for multi-robot control', ARTIFICIAL INTELLIGENCE, vol. 319, 103905. https://doi.org/10.1016/j.artint.2023.103905", "raw_type": "info:eu-repo/semantics/article"}, {"id": "pmh:oai:mediatum.ub.tum.de:node/1702843", "is_oa": false, "landing_page_url": "https://mediatum.ub.tum.de/1702843", "pdf_url": null, "source": {"id": "https://openalex.org/S4377196330", "display_name": "mediaTUM  (Technical University of Munich)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I62916508", "host_organization_name": "Technical University of Munich", "host_organization_lineage": ["https://openalex.org/I62916508"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "article"}], "best_oa_location": {"id": "doi:10.1016/j.artint.2023.103905", "is_oa": true, "landing_page_url": "https://doi.org/10.1016/j.artint.2023.103905", "pdf_url": null, "source": {"id": "https://openalex.org/S196139623", "display_name": "Artificial Intelligence", "issn_l": "0004-3702", "issn": ["0004-3702", "1872-7921"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": "cc-by-nc-nd", "license_id": "https://openalex.org/licenses/cc-by-nc-nd", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Artificial Intelligence", "raw_type": "journal-article"}, "sustainable_development_goals": [{"display_name": "Partnerships for the goals", "id": "https://metadata.un.org/sdg/17", "score": 0.44999998807907104}], "awards": [{"id": "https://openalex.org/G2730713857", "display_name": null, "funder_award_id": "945539", "funder_id": "https://openalex.org/F4320335254", "funder_display_name": "Horizon 2020"}, {"id": "https://openalex.org/G3506733245", "display_name": null, "funder_award_id": "Z221100003422004", "funder_id": "https://openalex.org/F4320325902", "funder_display_name": "Beijing Municipal Science and Technology Commission"}], "funders": [{"id": "https://openalex.org/F4320325902", "display_name": "Beijing Municipal Science and Technology Commission", "ror": null}, {"id": "https://openalex.org/F4320335254", "display_name": "Horizon 2020", "ror": null}], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 47, "referenced_works": ["https://openalex.org/W41554520", "https://openalex.org/W1845972764", "https://openalex.org/W2040871222", "https://openalex.org/W2050835671", "https://openalex.org/W2128410140", "https://openalex.org/W2234962923", "https://openalex.org/W2257979135", "https://openalex.org/W2291649624", "https://openalex.org/W2560504659", "https://openalex.org/W2604213426", "https://openalex.org/W2915117209", "https://openalex.org/W2950430092", "https://openalex.org/W2954295423", "https://openalex.org/W3032916997", "https://openalex.org/W3048871122", "https://openalex.org/W3157410348", "https://openalex.org/W3162902207", "https://openalex.org/W3173294282", "https://openalex.org/W3195968524", "https://openalex.org/W3196692929", "https://openalex.org/W3200561352", "https://openalex.org/W3201466613", "https://openalex.org/W4233813419", "https://openalex.org/W4250331344", "https://openalex.org/W4292313830", "https://openalex.org/W4294562617", "https://openalex.org/W4310895557", "https://openalex.org/W6638018090", "https://openalex.org/W6676896484", "https://openalex.org/W6682367392", "https://openalex.org/W6687063787", "https://openalex.org/W6737893269", "https://openalex.org/W6748239807", "https://openalex.org/W6749304979", "https://openalex.org/W6751535212", "https://openalex.org/W6758846586", "https://openalex.org/W6764658791", "https://openalex.org/W6784856472", "https://openalex.org/W6788898170", "https://openalex.org/W6793975612", "https://openalex.org/W6794308622", "https://openalex.org/W6797795308", "https://openalex.org/W6800004206", "https://openalex.org/W6800526921", "https://openalex.org/W6801514172", "https://openalex.org/W6802610412", "https://openalex.org/W6843754643"], "related_works": ["https://openalex.org/W2126019709", "https://openalex.org/W1702901972", "https://openalex.org/W4249798507", "https://openalex.org/W2069775250", "https://openalex.org/W2093541819", "https://openalex.org/W2032294417", "https://openalex.org/W2152754392", "https://openalex.org/W1989172970", "https://openalex.org/W2196316523", "https://openalex.org/W1899363654"], "abstract_inverted_index": {"A": [0], "challenging": [1], "problem": [2, 87], "in": [3, 14, 39, 52, 107, 193], "robotics": [4], "is": [5], "how": [6], "to": [7, 58, 76, 97, 120, 165], "control": [8, 20, 48, 173], "multiple": [9], "robots": [10], "cooperatively": [11], "and": [12, 93, 109, 137, 158, 171, 199, 208], "safely": [13], "real-world": [15], "applications.": [16], "Yet,": [17], "developing": [18], "multi-robot": [19, 47], "methods": [21, 188], "from": [22], "the": [23, 84, 121, 143, 167, 180, 194, 213], "perspective": [24], "of": [25, 74, 111, 169], "safe": [26, 44, 78, 85, 127, 146, 182], "multi-agent": [27, 128], "reinforcement": [28], "learning": [29], "(MARL)": [30], "has": [31, 57], "merely": [32], "been": [33], "studied.": [34], "To": [35], "fill": [36], "this": [37, 40], "gap,": [38], "study,": [41], "we": [42, 82, 124, 141], "investigate": [43], "MARL": [45, 86, 147, 170, 183], "for": [46], "on": [49, 179], "cooperative": [50], "tasks,": [51], "which": [53], "each": [54], "individual": [55], "robot": [56, 172], "not": [59], "only": [60], "meet": [61], "its": [62], "own": [63], "safety": [64, 112, 201], "constraints": [65, 113, 202], "while": [66], "maximising": [67], "their": [68], "reward,": [69], "but": [70], "also": [71], "consider": [72], "those": [73], "others": [75], "guarantee": [77], "team": [79], "behaviours.": [80], "Firstly,": [81], "formulate": [83], "as": [88, 118], "a": [89], "constrained": [90], "Markov": [91], "game": [92], "employ": [94], "policy": [95, 129], "optimisation": [96], "solve": [98], "it": [99], "theoretically.": [100], "The": [101], "proposed": [102], "algorithm": [103], "guarantees": [104], "monotonic": [105], "improvement": [106], "reward": [108, 198], "satisfaction": [110], "at": [114, 212], "every": [115], "iteration.": [116], "Secondly,": [117], "approximations": [119], "theoretical": [122], "solution,": [123], "propose": [125], "two": [126], "gradient": [130], "methods:": [131], "Multi-Agent": [132, 149, 154, 160], "Constrained": [133], "Policy": [134], "Optimisation": [135], "(MACPO)": [136], "MAPPO-Lagrangian": [138], ".": [139], "Thirdly,": [140], "develop": [142], "first": [144], "three": [145, 181], "benchmarks\u2014Safe": [148], "MuJoCo": [150], "(Safe": [151, 156, 163], "MAMuJoCo),": [152], "Safe": [153, 159], "Robosuite": [155], "MARobosuite)": [157], "Isaac": [161], "Gym": [162], "MAIG)": [164], "expand": [166], "toolkit": [168], "research": [174], "communities.": [175], "Finally,": [176], "experimental": [177], "results": [178], "benchmarks": [184], "indicate": [185], "that": [186], "our": [187], "can": [189], "achieve": [190], "state-of-the-art": [191], "performance": [192], "balance": [195], "between": [196], "improving": [197], "satisfying": [200], "compared": [203], "with": [204], "strong": [205], "baselines.": [206], "Demos": [207], "code": [209], "are": [210], "available": [211], "link": [214], "(": [215], "https://sites.google.com/view/aij-safe-marl/": [216], ").": [217], "2": [218]}, "counts_by_year": [{"year": 2026, "cited_by_count": 20}, {"year": 2025, "cited_by_count": 59}, {"year": 2024, "cited_by_count": 39}, {"year": 2023, "cited_by_count": 3}], "updated_date": "2026-07-02T09:51:11.867554", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2262174858", "doi": null, "title": "Apprenticeship learning and reinforcement learning with application to robotic control", "display_name": "Apprenticeship learning and reinforcement learning with application to robotic control", "relevance_score": 265.81558, "publication_year": 2008, "publication_date": "2008-01-01", "ids": {"openalex": "https://openalex.org/W2262174858", "mag": "2262174858"}, "language": "en", "primary_location": {"id": "pmh:oai:pqdtoai.proquest.com:3332983", "is_oa": false, "landing_page_url": "http://pqdtopen.proquest.com/#viewpdf?dispub=3332983", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "thesis"}, "type": "article", "indexed_in": [], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5049349154", "display_name": "Pieter Abbeel", "orcid": null}, "institutions": [{"id": "https://openalex.org/I97018004", "display_name": "Stanford University", "ror": "https://ror.org/00f54p054", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I97018004"]}], "countries": ["US"], "is_corresponding": true, "raw_author_name": "Pieter Abbeel", "raw_affiliation_strings": ["Stanford University ()"], "affiliations": [{"raw_affiliation_string": "Stanford University ()", "institution_ids": ["https://openalex.org/I97018004"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": ["https://openalex.org/A5049349154"], "corresponding_institution_ids": ["https://openalex.org/I97018004"], "apc_list": null, "apc_paid": null, "fwci": 8.5808, "has_fulltext": false, "cited_by_count": 72, "citation_normalized_percentile": {"value": 0.97518362, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 89, "max": 99}, "biblio": {"volume": null, "issue": null, "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11099", "display_name": "Autonomous Vehicle Technology and Safety", "score": 0.9905999898910522, "subfield": {"id": "https://openalex.org/subfields/2203", "display_name": "Automotive Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12072", "display_name": "Machine Learning and Algorithms", "score": 0.9886000156402588, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7923595905303955}, {"id": "https://openalex.org/keywords/traverse", "display_name": "Traverse", "score": 0.6694361567497253}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.668908953666687}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.6501655578613281}, {"id": "https://openalex.org/keywords/task", "display_name": "Task (project management)", "score": 0.6449666023254395}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6349065899848938}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.5135860443115234}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.4686512351036072}, {"id": "https://openalex.org/keywords/apprenticeship", "display_name": "Apprenticeship", "score": 0.4479144811630249}, {"id": "https://openalex.org/keywords/unobservable", "display_name": "Unobservable", "score": 0.44443386793136597}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.43831461668014526}, {"id": "https://openalex.org/keywords/terrain", "display_name": "Terrain", "score": 0.42798030376434326}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.196092426776886}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.10514238476753235}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7923595905303955}, {"id": "https://openalex.org/C176809094", "wikidata": "https://www.wikidata.org/wiki/Q15401496", "display_name": "Traverse", "level": 2, "score": 0.6694361567497253}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.668908953666687}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.6501655578613281}, {"id": "https://openalex.org/C2780451532", "wikidata": "https://www.wikidata.org/wiki/Q759676", "display_name": "Task (project management)", "level": 2, "score": 0.6449666023254395}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6349065899848938}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.5135860443115234}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.4686512351036072}, {"id": "https://openalex.org/C107806365", "wikidata": "https://www.wikidata.org/wiki/Q253567", "display_name": "Apprenticeship", "level": 2, "score": 0.4479144811630249}, {"id": "https://openalex.org/C2780695315", "wikidata": "https://www.wikidata.org/wiki/Q3799040", "display_name": "Unobservable", "level": 2, "score": 0.44443386793136597}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.43831461668014526}, {"id": "https://openalex.org/C161840515", "wikidata": "https://www.wikidata.org/wiki/Q186131", "display_name": "Terrain", "level": 2, "score": 0.42798030376434326}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.196092426776886}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.10514238476753235}, {"id": "https://openalex.org/C13280743", "wikidata": "https://www.wikidata.org/wiki/Q131089", "display_name": "Geodesy", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C205649164", "wikidata": "https://www.wikidata.org/wiki/Q1071", "display_name": "Geography", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C138885662", "wikidata": "https://www.wikidata.org/wiki/Q5891", "display_name": "Philosophy", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C18903297", "wikidata": "https://www.wikidata.org/wiki/Q7150", "display_name": "Ecology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C201995342", "wikidata": "https://www.wikidata.org/wiki/Q682496", "display_name": "Systems engineering", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C149782125", "wikidata": "https://www.wikidata.org/wiki/Q160039", "display_name": "Econometrics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C41895202", "wikidata": "https://www.wikidata.org/wiki/Q8162", "display_name": "Linguistics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "pmh:oai:pqdtoai.proquest.com:3332983", "is_oa": false, "landing_page_url": "http://pqdtopen.proquest.com/#viewpdf?dispub=3332983", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "thesis"}, {"id": "mag:2262174858", "is_oa": false, "landing_page_url": "https://dl.acm.org/citation.cfm?id=1571528", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": null}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 77, "referenced_works": ["https://openalex.org/W177093312", "https://openalex.org/W246617311", "https://openalex.org/W304861154", "https://openalex.org/W658381347", "https://openalex.org/W1495343868", "https://openalex.org/W1499408472", "https://openalex.org/W1511887321", "https://openalex.org/W1522531528", "https://openalex.org/W1537659559", "https://openalex.org/W1540685400", "https://openalex.org/W1542596583", "https://openalex.org/W1544444076", "https://openalex.org/W1554015367", "https://openalex.org/W1591675293", "https://openalex.org/W1594216983", "https://openalex.org/W1596805757", "https://openalex.org/W1597173708", "https://openalex.org/W1600813180", "https://openalex.org/W1602844557", "https://openalex.org/W1747856733", "https://openalex.org/W1777239053", "https://openalex.org/W1819208595", "https://openalex.org/W1821149804", "https://openalex.org/W1892385946", "https://openalex.org/W1949804828", "https://openalex.org/W1964535365", "https://openalex.org/W1965324089", "https://openalex.org/W1999874108", "https://openalex.org/W2012302913", "https://openalex.org/W2037539475", "https://openalex.org/W2044828368", "https://openalex.org/W2049633694", "https://openalex.org/W2054807159", "https://openalex.org/W2055207897", "https://openalex.org/W2056099894", "https://openalex.org/W2061562262", "https://openalex.org/W2064527819", "https://openalex.org/W2074231493", "https://openalex.org/W2096772472", "https://openalex.org/W2098432798", "https://openalex.org/W2099111195", "https://openalex.org/W2102222436", "https://openalex.org/W2103608534", "https://openalex.org/W2105934661", "https://openalex.org/W2105947986", "https://openalex.org/W2107726111", "https://openalex.org/W2111999018", "https://openalex.org/W2113023245", "https://openalex.org/W2116108870", "https://openalex.org/W2116442740", "https://openalex.org/W2117102490", "https://openalex.org/W2120591602", "https://openalex.org/W2121517924", "https://openalex.org/W2121863487", "https://openalex.org/W2125838338", "https://openalex.org/W2126105931", "https://openalex.org/W2128160875", "https://openalex.org/W2130105540", "https://openalex.org/W2139053308", "https://openalex.org/W2139302369", "https://openalex.org/W2142828048", "https://openalex.org/W2144587497", "https://openalex.org/W2148603752", "https://openalex.org/W2155357014", "https://openalex.org/W2158598687", "https://openalex.org/W2159080219", "https://openalex.org/W2161521419", "https://openalex.org/W2163614729", "https://openalex.org/W2167224731", "https://openalex.org/W2168359464", "https://openalex.org/W2169071224", "https://openalex.org/W2169080882", "https://openalex.org/W2169498096", "https://openalex.org/W2184803328", "https://openalex.org/W2489939061", "https://openalex.org/W2567948266", "https://openalex.org/W3195133498"], "related_works": ["https://openalex.org/W2121863487", "https://openalex.org/W2061562262", "https://openalex.org/W1986014385", "https://openalex.org/W1999874108", "https://openalex.org/W2169209873", "https://openalex.org/W1515851193", "https://openalex.org/W2087269417", "https://openalex.org/W2162991084", "https://openalex.org/W2145339207", "https://openalex.org/W1503697431", "https://openalex.org/W1756061918", "https://openalex.org/W2150930292", "https://openalex.org/W2119567691", "https://openalex.org/W1969160376", "https://openalex.org/W1777239053", "https://openalex.org/W2114882146", "https://openalex.org/W2890803796", "https://openalex.org/W3134537774", "https://openalex.org/W2964227158", "https://openalex.org/W1591675293"], "abstract_inverted_index": {"Many": [0], "problems": [1, 34, 119], "in": [2, 32, 44, 89, 124, 161, 209], "robotics": [3], "have": [4, 179, 192, 204], "unknown,": [5], "stochastic,": [6, 165], "high-dimensional,": [7], "and": [8, 12, 21, 80, 160, 167, 239], "highly": [9], "nonlinear": [10], "dynamics,": [11], "offer": [13], "significant": [14], "challenges": [15, 83], "to": [16, 41, 69, 85, 98, 142, 150, 183, 197, 229, 255], "both": [17, 77], "traditional": [18], "control": [19, 52, 145, 189], "methods": [20], "reinforcement": [22, 90], "learning": [23, 109, 127], "algorithms.": [24], "Some": [25], "of": [26, 50, 76, 133, 257], "the": [27, 51, 58, 86, 125, 134, 151, 157, 162, 171, 207, 219, 258], "key": [28], "difficulties": [29], "that": [30, 117, 256], "arise": [31], "these": [33, 118], "are:": [35], "(i)": [36], "It": [37, 65, 93], "is": [38, 57, 66, 94, 253], "often": [39, 67, 95], "difficult": [40, 68], "write": [42], "down,": [43], "closed": [45], "form,": [46], "a": [47, 71, 144, 194], "formal": [48, 112], "specification": [49], "task.": [53], "For": [54], "example,": [55], "what": [56], "objective": [59], "function": [60], "for": [61, 102], "\"flying": [62], "well\"?": [63], "(ii)": [64], "build": [70], "good": [72], "dynamics": [73], "model": [74], "because": [75], "data": [78, 81], "collection": [79], "modeling": [82], "(similar": [84], "\"exploration": [87], "problem\"": [88], "learning).": [91], "(iii)": [92], "computationally": [96], "expensive": [97], "find": [99], "closed-loop": [100], "controllers": [101], "high": [103], "dimensional,": [104], "stochastic": [105], "domains.": [106], "We": [107, 153], "describe": [108], "algorithms": [110, 139, 178], "with": [111, 147], "performance": [113, 148, 155, 252], "guarantees": [114], "which": [115, 241], "show": [116], "can": [120, 247], "be": [121], "efficiently": [122], "addressed": [123], "apprenticeship": [126], "setting\u2014the": [128], "setting": [129], "when": [130], "expert": [131, 244], "demonstrations": [132], "task": [135, 159], "are": [136, 140], "available.": [137], "Our": [138, 213, 249], "guaranteed": [141], "return": [143], "policy": [146], "comparable": [149, 254], "expert's.": [152], "evaluate": [154], "on": [156], "same": [158, 163], "(typically": [164], "high-dimensional": [166], "non-linear)": [168], "environment": [169], "as": [170, 234], "expert.": [172], "Besides": [173], "having": [174], "theoretical": [175], "guarantees,": [176], "our": [177], "also": [180], "enabled": [181, 193], "us": [182], "solve": [184], "some": [185], "previously": [186, 200], "unsolved": [187], "real-world": [188], "problems:": [190], "They": [191, 203], "quadruped": [195], "robot": [196], "traverse": [198], "challenging,": [199], "unseen": [201], "terrain.": [202], "significantly": [205], "extended": [206], "state-of-the-art": [208], "autonomous": [210, 227], "helicopter": [211, 214, 228], "flight.": [212], "has": [215], "performed": [216, 224], "by": [217, 225], "far": [218], "most": [220], "challenging": [221], "aerobatic": [222, 250], "maneuvers": [223, 232], "any": [226], "date,": [230], "including": [231], "such": [233], "continuous": [235], "in-place": [236], "flips,": [237], "rolls": [238], "tic-tocs,": [240], "only": [242], "exceptional": [243], "human": [245, 260], "pilots": [246], "fly.": [248], "flight": [251], "best": [259], "pilots.": [261]}, "counts_by_year": [{"year": 2024, "cited_by_count": 1}, {"year": 2023, "cited_by_count": 1}, {"year": 2022, "cited_by_count": 1}, {"year": 2021, "cited_by_count": 3}, {"year": 2020, "cited_by_count": 7}, {"year": 2019, "cited_by_count": 8}, {"year": 2018, "cited_by_count": 5}, {"year": 2017, "cited_by_count": 3}, {"year": 2016, "cited_by_count": 7}, {"year": 2015, "cited_by_count": 6}, {"year": 2014, "cited_by_count": 3}, {"year": 2013, "cited_by_count": 4}, {"year": 2012, "cited_by_count": 8}], "updated_date": "2025-11-06T04:12:42.849631", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2290354866", "doi": "https://doi.org/10.48550/arxiv.1603.00748", "title": "Continuous Deep Q-Learning with Model-based Acceleration", "display_name": "Continuous Deep Q-Learning with Model-based Acceleration", "relevance_score": 265.80838, "publication_year": 2016, "publication_date": "2016-03-02", "ids": {"openalex": "https://openalex.org/W2290354866", "doi": "https://doi.org/10.48550/arxiv.1603.00748", "mag": "2290354866"}, "language": "en", "primary_location": {"id": "pmh:oai:arXiv.org:1603.00748", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1603.00748", "pdf_url": "https://arxiv.org/pdf/1603.00748", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, "type": "article", "indexed_in": ["arxiv", "datacite"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": "https://arxiv.org/pdf/1603.00748", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5061613634", "display_name": "Shixiang Gu", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210135521", "display_name": "Max Planck Institute for Intelligent Systems", "ror": "https://ror.org/04fq9j139", "country_code": "DE", "type": "facility", "lineage": ["https://openalex.org/I149899117", "https://openalex.org/I4210135521"]}], "countries": ["DE", "US"], "is_corresponding": false, "raw_author_name": "Gu, Shixiang", "raw_affiliation_strings": ["University of Cambridge and Max Planck Institute for Intelligent Systems and Google Brain#TAB#"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Cambridge and Max Planck Institute for Intelligent Systems and Google Brain#TAB#", "institution_ids": ["https://openalex.org/I4210135521", "https://openalex.org/I1291425158"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5066294254", "display_name": "Timothy Lillicrap", "orcid": "https://orcid.org/0000-0001-8918-486X"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Lillicrap, Timothy", "raw_affiliation_strings": ["Google DeepMind;"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind;", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5006446297", "display_name": "Ilya Sutskever", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Sutskever, Ilya", "raw_affiliation_strings": ["[Google Brain]"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "[Google Brain]", "institution_ids": ["https://openalex.org/I1291425158"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5026322200", "display_name": "Sergey Levine", "orcid": "https://orcid.org/0000-0001-6764-2743"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Levine, Sergey", "raw_affiliation_strings": ["[Google Brain]"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "[Google Brain]", "institution_ids": ["https://openalex.org/I1291425158"]}]}], "institutions": [], "countries_distinct_count": 3, "institutions_distinct_count": 3, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": null, "has_fulltext": true, "cited_by_count": 337, "citation_normalized_percentile": null, "cited_by_percentile_year": null, "biblio": {"volume": null, "issue": null, "first_page": "2829", "last_page": "2838"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9876000285148621, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11689", "display_name": "Adversarial Robustness in Machine Learning", "score": 0.9819999933242798, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.9228410124778748}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7602490186691284}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5720012187957764}, {"id": "https://openalex.org/keywords/limit", "display_name": "Limit (mathematics)", "score": 0.5509039759635925}, {"id": "https://openalex.org/keywords/artificial-neural-network", "display_name": "Artificial neural network", "score": 0.536858081817627}, {"id": "https://openalex.org/keywords/acceleration", "display_name": "Acceleration", "score": 0.5264359712600708}, {"id": "https://openalex.org/keywords/range", "display_name": "Range (aeronautics)", "score": 0.5211313366889954}, {"id": "https://openalex.org/keywords/temporal-difference-learning", "display_name": "Temporal difference learning", "score": 0.513974130153656}, {"id": "https://openalex.org/keywords/set", "display_name": "Set (abstract data type)", "score": 0.493217408657074}, {"id": "https://openalex.org/keywords/representation", "display_name": "Representation (politics)", "score": 0.47142165899276733}, {"id": "https://openalex.org/keywords/function", "display_name": "Function (biology)", "score": 0.46152347326278687}, {"id": "https://openalex.org/keywords/q-learning", "display_name": "Q-learning", "score": 0.4430024027824402}, {"id": "https://openalex.org/keywords/deep-learning", "display_name": "Deep learning", "score": 0.4366663694381714}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.4084426760673523}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.12573018670082092}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.9228410124778748}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7602490186691284}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5720012187957764}, {"id": "https://openalex.org/C151201525", "wikidata": "https://www.wikidata.org/wiki/Q177239", "display_name": "Limit (mathematics)", "level": 2, "score": 0.5509039759635925}, {"id": "https://openalex.org/C50644808", "wikidata": "https://www.wikidata.org/wiki/Q192776", "display_name": "Artificial neural network", "level": 2, "score": 0.536858081817627}, {"id": "https://openalex.org/C117896860", "wikidata": "https://www.wikidata.org/wiki/Q11376", "display_name": "Acceleration", "level": 2, "score": 0.5264359712600708}, {"id": "https://openalex.org/C204323151", "wikidata": "https://www.wikidata.org/wiki/Q905424", "display_name": "Range (aeronautics)", "level": 2, "score": 0.5211313366889954}, {"id": "https://openalex.org/C196340769", "wikidata": "https://www.wikidata.org/wiki/Q7698910", "display_name": "Temporal difference learning", "level": 3, "score": 0.513974130153656}, {"id": "https://openalex.org/C177264268", "wikidata": "https://www.wikidata.org/wiki/Q1514741", "display_name": "Set (abstract data type)", "level": 2, "score": 0.493217408657074}, {"id": "https://openalex.org/C2776359362", "wikidata": "https://www.wikidata.org/wiki/Q2145286", "display_name": "Representation (politics)", "level": 3, "score": 0.47142165899276733}, {"id": "https://openalex.org/C14036430", "wikidata": "https://www.wikidata.org/wiki/Q3736076", "display_name": "Function (biology)", "level": 2, "score": 0.46152347326278687}, {"id": "https://openalex.org/C188116033", "wikidata": "https://www.wikidata.org/wiki/Q2664563", "display_name": "Q-learning", "level": 3, "score": 0.4430024027824402}, {"id": "https://openalex.org/C108583219", "wikidata": "https://www.wikidata.org/wiki/Q197536", "display_name": "Deep learning", "level": 2, "score": 0.4366663694381714}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.4084426760673523}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.12573018670082092}, {"id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C74650414", "wikidata": "https://www.wikidata.org/wiki/Q11397", "display_name": "Classical mechanics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C94625758", "wikidata": "https://www.wikidata.org/wiki/Q7163", "display_name": "Politics", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C78458016", "wikidata": "https://www.wikidata.org/wiki/Q840400", "display_name": "Evolutionary biology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C159985019", "wikidata": "https://www.wikidata.org/wiki/Q181790", "display_name": "Composite material", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C17744445", "wikidata": "https://www.wikidata.org/wiki/Q36442", "display_name": "Political science", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C199360897", "wikidata": "https://www.wikidata.org/wiki/Q9143", "display_name": "Programming language", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C192562407", "wikidata": "https://www.wikidata.org/wiki/Q228736", "display_name": "Materials science", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C199539241", "wikidata": "https://www.wikidata.org/wiki/Q7748", "display_name": "Law", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 5, "locations": [{"id": "pmh:oai:arXiv.org:1603.00748", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1603.00748", "pdf_url": "https://arxiv.org/pdf/1603.00748", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, {"id": "pmh:oai:arXiv.org:1603.00748", "is_oa": true, "landing_page_url": "https://arxiv.org/abs/1603.00748", "pdf_url": "https://arxiv.org/pdf/1603.00748", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "text"}, {"id": "pmh:oai:www.repository.cam.ac.uk:1810/255896", "is_oa": true, "landing_page_url": "https://www.repository.cam.ac.uk/handle/1810/255896", "pdf_url": "https://www.repository.cam.ac.uk/handle/1810/255896", "source": {"id": "https://openalex.org/S4306401776", "display_name": "Apollo (University of Cambridge)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I241749", "host_organization_name": "University of Cambridge", "host_organization_lineage": ["https://openalex.org/I241749"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "published version"}, {"id": "doi:10.48550/arxiv.1603.00748", "is_oa": true, "landing_page_url": "https://doi.org/10.48550/arxiv.1603.00748", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": "Preprint"}, {"id": "mag:2290354866", "is_oa": false, "landing_page_url": null, "pdf_url": null, "source": null, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": null}], "best_oa_location": {"id": "pmh:oai:arXiv.org:1603.00748", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1603.00748", "pdf_url": "https://arxiv.org/pdf/1603.00748", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, "sustainable_development_goals": [], "awards": [], "funders": [{"id": "https://openalex.org/F4320317153", "display_name": "DeepMind", "ror": "https://ror.org/00971b260"}], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 39, "referenced_works": ["https://openalex.org/W195033972", "https://openalex.org/W1491843047", "https://openalex.org/W1499669280", "https://openalex.org/W1597173708", "https://openalex.org/W1599347336", "https://openalex.org/W1771410628", "https://openalex.org/W1825869920", "https://openalex.org/W2012587148", "https://openalex.org/W2038794597", "https://openalex.org/W2087617385", "https://openalex.org/W2104733512", "https://openalex.org/W2115121720", "https://openalex.org/W2121103318", "https://openalex.org/W2127107099", "https://openalex.org/W2140135625", "https://openalex.org/W2143072483", "https://openalex.org/W2145339207", "https://openalex.org/W2154208977", "https://openalex.org/W2155027007", "https://openalex.org/W2156737235", "https://openalex.org/W2158782408", "https://openalex.org/W2165150801", "https://openalex.org/W2173564293", "https://openalex.org/W2296360731", "https://openalex.org/W2566089760", "https://openalex.org/W2949608212", "https://openalex.org/W2962986780", "https://openalex.org/W2963184621", "https://openalex.org/W2963280855", "https://openalex.org/W2963430173", "https://openalex.org/W2963477884", "https://openalex.org/W2963616477", "https://openalex.org/W2963864421", "https://openalex.org/W2964006217", "https://openalex.org/W2964121744", "https://openalex.org/W2964161785", "https://openalex.org/W2965916140", "https://openalex.org/W3148194443", "https://openalex.org/W3148685027"], "related_works": ["https://openalex.org/W2964161785", "https://openalex.org/W2964121744", "https://openalex.org/W2964043796", "https://openalex.org/W2962872206", "https://openalex.org/W2736601468", "https://openalex.org/W2257979135", "https://openalex.org/W2173248099", "https://openalex.org/W2165150801", "https://openalex.org/W2158782408", "https://openalex.org/W2155968351", "https://openalex.org/W2155027007", "https://openalex.org/W2145339207", "https://openalex.org/W2140135625", "https://openalex.org/W2121863487", "https://openalex.org/W1771410628", "https://openalex.org/W1757796397", "https://openalex.org/W1491843047", "https://openalex.org/W2119717200", "https://openalex.org/W2173564293", "https://openalex.org/W2964006217"], "abstract_inverted_index": {"Model-free": [0], "reinforcement": [1, 63, 154], "learning": [2, 64, 173], "has": [3, 14], "been": [4, 16], "successfully": [5], "applied": [6], "to": [7, 18, 41, 45, 56, 101, 115, 121], "a": [8, 84, 129], "range": [9], "of": [10, 31, 61, 78, 87, 131, 141, 148], "challenging": [11], "problems,": [12], "and": [13, 24, 54, 108, 124, 169], "recently": [15], "extended": [17], "handle": [19], "large": [20], "neural": [21], "network": [22], "policies": [23], "value": [25], "functions.": [26], "However,": [27], "the": [28, 58, 76, 88, 102, 139, 146], "sample": [29, 59], "complexity": [30, 60], "model-free": [32, 153], "algorithms,": [33], "particularly": [34], "when": [35], "using": [36], "high-dimensional": [37], "function": [38], "approximators,": [39], "tends": [40], "limit": [42], "their": [43], "applicability": [44], "physical": [46], "systems.": [47], "In": [48], "this": [49], "paper,": [50], "we": [51, 82, 92, 144], "explore": [52, 145], "algorithms": [53], "representations": [55], "reduce": [57], "deep": [62], "for": [65, 74, 151, 167], "continuous": [66, 85, 122], "control": [67, 134], "tasks.": [68, 135], "We": [69, 156], "propose": [70], "two": [71], "complementary": [72], "techniques": [73], "improving": [75], "efficiency": [77, 140], "such": [79, 177], "algorithms.": [80], "First,": [81], "derive": [83], "variant": [86], "Q-learning": [89, 117], "algorithm,": [90], "which": [91], "call": [93], "normalized": [94], "adantage": [95], "functions": [96], "(NAF),": [97], "as": [98], "an": [99], "alternative": [100], "more": [103], "commonly": [104], "used": [105], "policy": [106], "gradient": [107], "actor-critic": [109], "methods.": [110], "NAF": [111], "representation": [112], "allows": [113], "us": [114], "apply": [116], "with": [118], "experience": [119], "replay": [120], "tasks,": [123], "substantially": [125, 171], "improves": [126], "performance": [127], "on": [128, 174], "set": [130], "simulated": [132], "robotic": [133], "To": [136], "further": [137], "improve": [138], "our": [142], "approach,": [143], "use": [147], "learned": [149], "models": [150, 163, 178], "accelerating": [152], "learning.": [155], "show": [157], "that": [158], "iteratively": [159], "refitted": [160], "local": [161], "linear": [162], "are": [164, 179], "especially": [165], "effective": [166], "this,": [168], "demonstrate": [170], "faster": [172], "domains": [175], "where": [176], "applicable.": [180]}, "counts_by_year": [{"year": 2024, "cited_by_count": 5}, {"year": 2023, "cited_by_count": 4}, {"year": 2022, "cited_by_count": 12}, {"year": 2021, "cited_by_count": 63}, {"year": 2020, "cited_by_count": 83}, {"year": 2019, "cited_by_count": 88}, {"year": 2018, "cited_by_count": 55}, {"year": 2017, "cited_by_count": 22}, {"year": 2016, "cited_by_count": 5}], "updated_date": "2026-07-03T08:13:44.112507", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W1825869920", "doi": "https://doi.org/10.1007/s10994-011-5235-x", "title": "Reinforcement learning in feedback control", "display_name": "Reinforcement learning in feedback control", "relevance_score": 260.35193, "publication_year": 2011, "publication_date": "2011-02-26", "ids": {"openalex": "https://openalex.org/W1825869920", "doi": "https://doi.org/10.1007/s10994-011-5235-x", "mag": "1825869920"}, "language": "en", "primary_location": {"id": "doi:10.1007/s10994-011-5235-x", "is_oa": true, "landing_page_url": "https://doi.org/10.1007/s10994-011-5235-x", "pdf_url": "https://link.springer.com/content/pdf/10.1007/s10994-011-5235-x.pdf", "source": {"id": "https://openalex.org/S62148650", "display_name": "Machine Learning", "issn_l": "0885-6125", "issn": ["0885-6125", "1573-0565"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Machine Learning", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "bronze", "oa_url": "https://link.springer.com/content/pdf/10.1007/s10994-011-5235-x.pdf", "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5018196238", "display_name": "Roland Hafner", "orcid": "https://orcid.org/0000-0001-8061-8828"}, "institutions": [{"id": "https://openalex.org/I161046081", "display_name": "University of Freiburg", "ror": "https://ror.org/0245cg223", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I161046081"]}], "countries": ["DE"], "is_corresponding": true, "raw_author_name": "Roland Hafner", "raw_affiliation_strings": ["Machine Learning Lab, Albert-Ludwigs University Freiburg, Freiburg im Breisgau, Germany", "Machine Learning Lab, Albert-Ludwigs University Freiburg, Freiburg im Breisgau, Germany#TAB#"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Machine Learning Lab, Albert-Ludwigs University Freiburg, Freiburg im Breisgau, Germany", "institution_ids": ["https://openalex.org/I161046081"]}, {"raw_affiliation_string": "Machine Learning Lab, Albert-Ludwigs University Freiburg, Freiburg im Breisgau, Germany#TAB#", "institution_ids": ["https://openalex.org/I161046081"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5041323275", "display_name": "Martin Riedmiller", "orcid": "https://orcid.org/0000-0002-8465-5690"}, "institutions": [{"id": "https://openalex.org/I161046081", "display_name": "University of Freiburg", "ror": "https://ror.org/0245cg223", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I161046081"]}], "countries": ["DE"], "is_corresponding": false, "raw_author_name": "Martin Riedmiller", "raw_affiliation_strings": ["Machine Learning Lab, Albert-Ludwigs University Freiburg, Freiburg im Breisgau, Germany", "Machine Learning Lab, Albert-Ludwigs University Freiburg, Freiburg im Breisgau, Germany#TAB#"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Machine Learning Lab, Albert-Ludwigs University Freiburg, Freiburg im Breisgau, Germany", "institution_ids": ["https://openalex.org/I161046081"]}, {"raw_affiliation_string": "Machine Learning Lab, Albert-Ludwigs University Freiburg, Freiburg im Breisgau, Germany#TAB#", "institution_ids": ["https://openalex.org/I161046081"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 2, "corresponding_author_ids": ["https://openalex.org/A5018196238"], "corresponding_institution_ids": ["https://openalex.org/I161046081"], "apc_list": {"value": 2390, "currency": "EUR", "value_usd": 2990}, "apc_paid": null, "fwci": 13.1439, "has_fulltext": true, "cited_by_count": 218, "citation_normalized_percentile": {"value": 0.98603146, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 98, "max": 100}, "biblio": {"volume": "84", "issue": "1-2", "first_page": "137", "last_page": "169"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T11749", "display_name": "Iterative Learning Control Systems", "score": 0.9980000257492065, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T11749", "display_name": "Iterative Learning Control Systems", "score": 0.9980000257492065, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9968000054359436, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10791", "display_name": "Advanced Control Systems Optimization", "score": 0.9944000244140625, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8585594296455383}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7971539497375488}, {"id": "https://openalex.org/keywords/benchmark", "display_name": "Benchmark (surveying)", "score": 0.7489572167396545}, {"id": "https://openalex.org/keywords/controller", "display_name": "Controller (irrigation)", "score": 0.6161996722221375}, {"id": "https://openalex.org/keywords/process", "display_name": "Process (computing)", "score": 0.5830215811729431}, {"id": "https://openalex.org/keywords/stability", "display_name": "Stability (learning theory)", "score": 0.5635536313056946}, {"id": "https://openalex.org/keywords/set", "display_name": "Set (abstract data type)", "score": 0.5174022912979126}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.49439549446105957}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.4898836016654968}, {"id": "https://openalex.org/keywords/key", "display_name": "Key (lock)", "score": 0.4669206142425537}, {"id": "https://openalex.org/keywords/quality", "display_name": "Quality (philosophy)", "score": 0.4602445960044861}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.4358670711517334}, {"id": "https://openalex.org/keywords/perspective", "display_name": "Perspective (graphical)", "score": 0.4153425097465515}, {"id": "https://openalex.org/keywords/scheme", "display_name": "Scheme (mathematics)", "score": 0.4110400080680847}, {"id": "https://openalex.org/keywords/control-engineering", "display_name": "Control engineering", "score": 0.3307439088821411}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.11665371060371399}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.07957309484481812}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8585594296455383}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7971539497375488}, {"id": "https://openalex.org/C185798385", "wikidata": "https://www.wikidata.org/wiki/Q1161707", "display_name": "Benchmark (surveying)", "level": 2, "score": 0.7489572167396545}, {"id": "https://openalex.org/C203479927", "wikidata": "https://www.wikidata.org/wiki/Q5165939", "display_name": "Controller (irrigation)", "level": 2, "score": 0.6161996722221375}, {"id": "https://openalex.org/C98045186", "wikidata": "https://www.wikidata.org/wiki/Q205663", "display_name": "Process (computing)", "level": 2, "score": 0.5830215811729431}, {"id": "https://openalex.org/C112972136", "wikidata": "https://www.wikidata.org/wiki/Q7595718", "display_name": "Stability (learning theory)", "level": 2, "score": 0.5635536313056946}, {"id": "https://openalex.org/C177264268", "wikidata": "https://www.wikidata.org/wiki/Q1514741", "display_name": "Set (abstract data type)", "level": 2, "score": 0.5174022912979126}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.49439549446105957}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.4898836016654968}, {"id": "https://openalex.org/C26517878", "wikidata": "https://www.wikidata.org/wiki/Q228039", "display_name": "Key (lock)", "level": 2, "score": 0.4669206142425537}, {"id": "https://openalex.org/C2779530757", "wikidata": "https://www.wikidata.org/wiki/Q1207505", "display_name": "Quality (philosophy)", "level": 2, "score": 0.4602445960044861}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.4358670711517334}, {"id": "https://openalex.org/C12713177", "wikidata": "https://www.wikidata.org/wiki/Q1900281", "display_name": "Perspective (graphical)", "level": 2, "score": 0.4153425097465515}, {"id": "https://openalex.org/C77618280", "wikidata": "https://www.wikidata.org/wiki/Q1155772", "display_name": "Scheme (mathematics)", "level": 2, "score": 0.4110400080680847}, {"id": "https://openalex.org/C133731056", "wikidata": "https://www.wikidata.org/wiki/Q4917288", "display_name": "Control engineering", "level": 1, "score": 0.3307439088821411}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.11665371060371399}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.07957309484481812}, {"id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C38652104", "wikidata": "https://www.wikidata.org/wiki/Q3510521", "display_name": "Computer security", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C138885662", "wikidata": "https://www.wikidata.org/wiki/Q5891", "display_name": "Philosophy", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C205649164", "wikidata": "https://www.wikidata.org/wiki/Q1071", "display_name": "Geography", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C13280743", "wikidata": "https://www.wikidata.org/wiki/Q131089", "display_name": "Geodesy", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C111472728", "wikidata": "https://www.wikidata.org/wiki/Q9471", "display_name": "Epistemology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C199360897", "wikidata": "https://www.wikidata.org/wiki/Q9143", "display_name": "Programming language", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C6557445", "wikidata": "https://www.wikidata.org/wiki/Q173113", "display_name": "Agronomy", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1007/s10994-011-5235-x", "is_oa": true, "landing_page_url": "https://doi.org/10.1007/s10994-011-5235-x", "pdf_url": "https://link.springer.com/content/pdf/10.1007/s10994-011-5235-x.pdf", "source": {"id": "https://openalex.org/S62148650", "display_name": "Machine Learning", "issn_l": "0885-6125", "issn": ["0885-6125", "1573-0565"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Machine Learning", "raw_type": "journal-article"}], "best_oa_location": {"id": "doi:10.1007/s10994-011-5235-x", "is_oa": true, "landing_page_url": "https://doi.org/10.1007/s10994-011-5235-x", "pdf_url": "https://link.springer.com/content/pdf/10.1007/s10994-011-5235-x.pdf", "source": {"id": "https://openalex.org/S62148650", "display_name": "Machine Learning", "issn_l": "0885-6125", "issn": ["0885-6125", "1573-0565"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Machine Learning", "raw_type": "journal-article"}, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"grobid_xml": true, "pdf": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W1825869920.pdf", "grobid_xml": "https://content.openalex.org/works/W1825869920.grobid-xml"}, "referenced_works_count": 59, "referenced_works": ["https://openalex.org/W166862392", "https://openalex.org/W178056938", "https://openalex.org/W190583841", "https://openalex.org/W593265459", "https://openalex.org/W967057166", "https://openalex.org/W1487127700", "https://openalex.org/W1508997545", "https://openalex.org/W1523738681", "https://openalex.org/W1557517019", "https://openalex.org/W1559107966", "https://openalex.org/W1603277681", "https://openalex.org/W1949804828", "https://openalex.org/W1965324089", "https://openalex.org/W1987356574", "https://openalex.org/W1994483619", "https://openalex.org/W2001426593", "https://openalex.org/W2040135606", "https://openalex.org/W2054496741", "https://openalex.org/W2059343892", "https://openalex.org/W2103048296", "https://openalex.org/W2103626435", "https://openalex.org/W2117341272", "https://openalex.org/W2118426468", "https://openalex.org/W2127107099", "https://openalex.org/W2127412976", "https://openalex.org/W2132737131", "https://openalex.org/W2134569556", "https://openalex.org/W2136064843", "https://openalex.org/W2137002893", "https://openalex.org/W2139721494", "https://openalex.org/W2142592520", "https://openalex.org/W2143908786", "https://openalex.org/W2146737184", "https://openalex.org/W2149078751", "https://openalex.org/W2154032554", "https://openalex.org/W2156666755", "https://openalex.org/W2165501837", "https://openalex.org/W2165924625", "https://openalex.org/W2168945912", "https://openalex.org/W2187600643", "https://openalex.org/W2271358270", "https://openalex.org/W2341171179", "https://openalex.org/W2346626577", "https://openalex.org/W2492791808", "https://openalex.org/W2546698832", "https://openalex.org/W2582998992", "https://openalex.org/W2618159560", "https://openalex.org/W2619153869", "https://openalex.org/W2974036678", "https://openalex.org/W3011120880", "https://openalex.org/W4238586295", "https://openalex.org/W4299507991", "https://openalex.org/W6601295022", "https://openalex.org/W6629040409", "https://openalex.org/W6678927950", "https://openalex.org/W6704298589", "https://openalex.org/W6729406737", "https://openalex.org/W6775686901", "https://openalex.org/W7025024117"], "related_works": ["https://openalex.org/W2378211422", "https://openalex.org/W4321353415", "https://openalex.org/W2130974462", "https://openalex.org/W972276598", "https://openalex.org/W2086519370", "https://openalex.org/W2028665553", "https://openalex.org/W2087343574", "https://openalex.org/W2535915176", "https://openalex.org/W2105860728", "https://openalex.org/W4287657826"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2026, "cited_by_count": 4}, {"year": 2025, "cited_by_count": 8}, {"year": 2024, "cited_by_count": 13}, {"year": 2023, "cited_by_count": 22}, {"year": 2022, "cited_by_count": 24}, {"year": 2021, "cited_by_count": 21}, {"year": 2020, "cited_by_count": 23}, {"year": 2019, "cited_by_count": 27}, {"year": 2018, "cited_by_count": 20}, {"year": 2017, "cited_by_count": 12}, {"year": 2016, "cited_by_count": 13}, {"year": 2015, "cited_by_count": 11}, {"year": 2014, "cited_by_count": 9}, {"year": 2013, "cited_by_count": 6}, {"year": 2012, "cited_by_count": 5}], "updated_date": "2026-06-23T13:55:30.953635", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2788862220", "doi": "https://doi.org/10.1609/aaai.v32i1.11757", "title": "Deep Q-learning From Demonstrations", "display_name": "Deep Q-learning From Demonstrations", "relevance_score": 257.639, "publication_year": 2018, "publication_date": "2018-04-29", "ids": {"openalex": "https://openalex.org/W2788862220", "doi": "https://doi.org/10.1609/aaai.v32i1.11757", "mag": "2788862220"}, "language": "en", "primary_location": {"id": "doi:10.1609/aaai.v32i1.11757", "is_oa": true, "landing_page_url": "https://doi.org/10.1609/aaai.v32i1.11757", "pdf_url": null, "source": {"id": "https://openalex.org/S4210191458", "display_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "issn_l": "2159-5399", "issn": ["2159-5399", "2374-3468"], "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320058", "host_organization_name": "Association for the Advancement of Artificial Intelligence", "host_organization_lineage": ["https://openalex.org/P4310320058"], "host_organization_lineage_names": ["Association for the Advancement of Artificial Intelligence"], "type": "conference"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "diamond", "oa_url": "https://doi.org/10.1609/aaai.v32i1.11757", "any_repository_has_fulltext": null}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5048229171", "display_name": "Todd Hester", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Todd Hester", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5039155450", "display_name": "Matej Vecer\u00edk", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Matej Vecerik", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5065100569", "display_name": "Olivier Pietquin", "orcid": "https://orcid.org/0000-0002-5386-465X"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Olivier Pietquin", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5049659586", "display_name": "Marc Lanctot", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Marc Lanctot", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5081322018", "display_name": "Tom Schaul", "orcid": "https://orcid.org/0000-0002-2961-8782"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Tom Schaul", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5103033215", "display_name": "Bilal Piot", "orcid": "https://orcid.org/0000-0003-3906-950X"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Bilal Piot", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5030338894", "display_name": "Dan Horgan", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Dan Horgan", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5018191427", "display_name": "John Quan", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "John Quan", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5028929445", "display_name": "Andrew Sendonaris", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Andrew Sendonaris", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5015899120", "display_name": "Ian Osband", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Ian Osband", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5008880429", "display_name": "Gabriel Dulac-Arnold", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Gabriel Dulac-Arnold", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5017056095", "display_name": "John Agapiou", "orcid": "https://orcid.org/0000-0003-2642-2845"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "John Agapiou", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5054808675", "display_name": "Joel Z. Leibo", "orcid": "https://orcid.org/0000-0002-3153-916X"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Joel Leibo", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5040179074", "display_name": "Audr\u016bnas Gruslys", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "Google DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Audrunas Gruslys", "raw_affiliation_strings": ["Google DeepMind"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Google DeepMind", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 2, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": 23.4073, "has_fulltext": true, "cited_by_count": 811, "citation_normalized_percentile": {"value": 0.99329124, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 99, "max": 100}, "biblio": {"volume": "32", "issue": "1", "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11975", "display_name": "Evolutionary Algorithms and Applications", "score": 0.9901999831199646, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11674", "display_name": "Sports Analytics and Performance", "score": 0.9868999719619751, "subfield": {"id": "https://openalex.org/subfields/2002", "display_name": "Economics and Econometrics"}, "field": {"id": "https://openalex.org/fields/20", "display_name": "Economics, Econometrics and Finance"}, "domain": {"id": "https://openalex.org/domains/2", "display_name": "Social Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.8663511276245117}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8146408796310425}, {"id": "https://openalex.org/keywords/process", "display_name": "Process (computing)", "score": 0.6455647349357605}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6373509168624878}, {"id": "https://openalex.org/keywords/deep-learning", "display_name": "Deep learning", "score": 0.5887935161590576}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.521086573600769}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.4358724355697632}], "concepts": [{"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.8663511276245117}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8146408796310425}, {"id": "https://openalex.org/C98045186", "wikidata": "https://www.wikidata.org/wiki/Q205663", "display_name": "Process (computing)", "level": 2, "score": 0.6455647349357605}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6373509168624878}, {"id": "https://openalex.org/C108583219", "wikidata": "https://www.wikidata.org/wiki/Q197536", "display_name": "Deep learning", "level": 2, "score": 0.5887935161590576}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.521086573600769}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.4358724355697632}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "doi:10.1609/aaai.v32i1.11757", "is_oa": true, "landing_page_url": "https://doi.org/10.1609/aaai.v32i1.11757", "pdf_url": null, "source": {"id": "https://openalex.org/S4210191458", "display_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "issn_l": "2159-5399", "issn": ["2159-5399", "2374-3468"], "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320058", "host_organization_name": "Association for the Advancement of Artificial Intelligence", "host_organization_lineage": ["https://openalex.org/P4310320058"], "host_organization_lineage_names": ["Association for the Advancement of Artificial Intelligence"], "type": "conference"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "raw_type": "journal-article"}, {"id": "pmh:oai:ojs.aaai.org:article/11757", "is_oa": true, "landing_page_url": "https://ojs.aaai.org/index.php/AAAI/article/view/11757", "pdf_url": "https://ojs.aaai.org/index.php/AAAI/article/download/11757/11616", "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "2159-5399", "raw_type": "info:eu-repo/semantics/article"}], "best_oa_location": {"id": "doi:10.1609/aaai.v32i1.11757", "is_oa": true, "landing_page_url": "https://doi.org/10.1609/aaai.v32i1.11757", "pdf_url": null, "source": {"id": "https://openalex.org/S4210191458", "display_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "issn_l": "2159-5399", "issn": ["2159-5399", "2374-3468"], "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320058", "host_organization_name": "Association for the Advancement of Artificial Intelligence", "host_organization_lineage": ["https://openalex.org/P4310320058"], "host_organization_lineage_names": ["Association for the Advancement of Artificial Intelligence"], "type": "conference"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the AAAI Conference on Artificial Intelligence", "raw_type": "journal-article"}, "sustainable_development_goals": [], "awards": [], "funders": [{"id": "https://openalex.org/F4320317153", "display_name": "DeepMind", "ror": "https://ror.org/00971b260"}], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 66, "referenced_works": ["https://openalex.org/W106792269", "https://openalex.org/W834081922", "https://openalex.org/W1515851193", "https://openalex.org/W1931877416", "https://openalex.org/W1999874108", "https://openalex.org/W2061562262", "https://openalex.org/W2102847492", "https://openalex.org/W2113023245", "https://openalex.org/W2133552775", "https://openalex.org/W2137375617", "https://openalex.org/W2138108551", "https://openalex.org/W2145339207", "https://openalex.org/W2148051740", "https://openalex.org/W2148112459", "https://openalex.org/W2155968351", "https://openalex.org/W2169209873", "https://openalex.org/W2173564293", "https://openalex.org/W2181849516", "https://openalex.org/W2201581102", "https://openalex.org/W2253157232", "https://openalex.org/W2257979135", "https://openalex.org/W2260756217", "https://openalex.org/W2290053245", "https://openalex.org/W2290104316", "https://openalex.org/W2396161314", "https://openalex.org/W2397581010", "https://openalex.org/W2415726935", "https://openalex.org/W2434014514", "https://openalex.org/W2481567506", "https://openalex.org/W2491675558", "https://openalex.org/W2507592741", "https://openalex.org/W2509374375", "https://openalex.org/W2551887912", "https://openalex.org/W2594640072", "https://openalex.org/W2596982695", "https://openalex.org/W2601322194", "https://openalex.org/W2607198029", "https://openalex.org/W2612610049", "https://openalex.org/W2746553466", "https://openalex.org/W2919115771", "https://openalex.org/W2950735232", "https://openalex.org/W2950872548", "https://openalex.org/W2951799221", "https://openalex.org/W2952509347", "https://openalex.org/W2962957031", "https://openalex.org/W2963094133", "https://openalex.org/W2963160877", "https://openalex.org/W2963211300", "https://openalex.org/W2963277051", "https://openalex.org/W2963430173", "https://openalex.org/W2963477884", "https://openalex.org/W2964043796", "https://openalex.org/W2964161785", "https://openalex.org/W3103780890", "https://openalex.org/W4299563772", "https://openalex.org/W4312558117", "https://openalex.org/W6630221451", "https://openalex.org/W6631026904", "https://openalex.org/W6676728370", "https://openalex.org/W6679700999", "https://openalex.org/W6687681856", "https://openalex.org/W6691230391", "https://openalex.org/W6696265566", "https://openalex.org/W6718092244", "https://openalex.org/W6735944222", "https://openalex.org/W6760385162"], "related_works": ["https://openalex.org/W4362501864", "https://openalex.org/W4306904969", "https://openalex.org/W4380318855", "https://openalex.org/W2138720691", "https://openalex.org/W2031695474", "https://openalex.org/W2586732548", "https://openalex.org/W3049728571", "https://openalex.org/W20361778", "https://openalex.org/W2024136090", "https://openalex.org/W4380075502"], "abstract_inverted_index": {"Deep": [0, 91, 163], "reinforcement": [1], "learning": [2, 34, 107, 130], "(RL)": [3], "has": [4, 155], "achieved": [5], "several": [6], "high": [7], "profile": [8], "successes": [9], "in": [10, 64, 207], "difficult": [11], "decision-making": [12], "problems.": [13], "However,": [14], "these": [15], "algorithms": [16, 235], "typically": [17], "require": [18], "a": [19, 44, 73, 133], "huge": [20], "amount": [21], "of": [22, 52, 84, 100, 114, 126, 147, 180, 209], "data": [23, 80, 102, 116, 128, 239], "before": [24], "they": [25], "reach": [26], "reasonable": [27], "performance.": [28, 198], "In": [29, 68, 212], "fact,": [30], "their": [31], "performance": [32, 158], "during": [33], "can": [35], "be": [36, 41], "extremely": [37], "poor.": [38], "This": [39], "may": [40, 78], "acceptable": [42], "for": [43, 222, 236], "simulator,": [45], "but": [46], "it": [47, 168, 186], "severely": [48], "limits": [49], "the": [50, 60, 65, 76, 85, 106, 123, 148, 174, 203], "applicability": [51], "deep": [53], "RL": [54], "to": [55, 103, 120, 132, 193, 196, 201, 218], "many": [56], "real-world": [57], "tasks,": [58], "where": [59, 75], "agent": [61, 77], "must": [62], "learn": [63], "real": [66], "environment.": [67], "this": [69], "paper": [70], "we": [71, 226], "study": [72], "setting": [74], "access": [79], "from": [81, 93, 110], "previous": [82], "control": [83], "system.": [86], "We": [87, 151], "present": [88], "an": [89], "algorithm,": [90], "Q-learning": [92], "Demonstrations": [94], "(DQfD),": [95], "that": [96, 153, 228], "leverages": [97, 215], "small": [98, 112], "sets": [99], "demonstration": [101, 115, 127, 205, 238], "massively": [104], "accelerate": [105], "process": [108], "even": [109], "relatively": [111], "amounts": [113], "and": [117, 183], "is": [118], "able": [119], "automatically": [121], "assess": [122], "necessary": [124], "ratio": [125], "while": [129], "thanks": [131], "prioritized": [134], "replay": [135], "mechanism.": [136], "DQfD": [137, 154, 199, 214, 229], "works": [138], "by": [139], "combining": [140], "temporal": [141], "difference": [142], "updates": [143], "with": [144, 170], "supervised": [145], "classification": [146], "demonstrator\u2019s": [149], "actions.": [150], "show": [152, 227], "better": [156, 171, 231], "initial": [157], "than": [159, 232], "Prioritized": [160], "Dueling": [161], "Double": [162], "Q-Networks": [164], "(PDD": [165], "DQN)": [166], "as": [167], "starts": [169], "scores": [172], "on": [173, 178, 184], "first": [175], "million": [176, 191], "steps": [177, 192], "41": [179], "42": [181, 210], "games": [182], "average": [185], "takes": [187], "PDD": [188], "DQN": [189], "83": [190], "catch": [194], "up": [195], "DQfD\u2019s": [197], "learns": [200], "out-perform": [202], "best": [204], "given": [206], "14": [208], "games.": [211, 224], "addition,": [213], "human": [216], "demonstrations": [217], "achieve": [219], "state-of-the-art": [220], "results": [221], "11": [223], "Finally,": [225], "performs": [230], "three": [233], "related": [234], "incorporating": [237], "into": [240], "DQN.": [241]}, "counts_by_year": [{"year": 2026, "cited_by_count": 24}, {"year": 2025, "cited_by_count": 96}, {"year": 2024, "cited_by_count": 107}, {"year": 2023, "cited_by_count": 150}, {"year": 2022, "cited_by_count": 126}, {"year": 2021, "cited_by_count": 110}, {"year": 2020, "cited_by_count": 97}, {"year": 2019, "cited_by_count": 58}, {"year": 2018, "cited_by_count": 35}, {"year": 2017, "cited_by_count": 8}], "updated_date": "2026-07-02T09:51:11.867554", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W1989099984", "doi": "https://doi.org/10.1109/tsmcc.2012.2186565", "title": "Machine Learning Algorithms in Bipedal Robot Control", "display_name": "Machine Learning Algorithms in Bipedal Robot Control", "relevance_score": 254.60246, "publication_year": 2012, "publication_date": "2012-04-18", "ids": {"openalex": "https://openalex.org/W1989099984", "doi": "https://doi.org/10.1109/tsmcc.2012.2186565", "mag": "1989099984"}, "language": "en", "primary_location": {"id": "doi:10.1109/tsmcc.2012.2186565", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/tsmcc.2012.2186565", "pdf_url": null, "source": {"id": "https://openalex.org/S4210227557", "display_name": "IEEE Transactions on Systems Man and Cybernetics Part C (Applications and Reviews)", "issn_l": "1094-6977", "issn": ["1094-6977", "1558-2442"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319808", "host_organization_name": "Institute of Electrical and Electronics Engineers", "host_organization_lineage": ["https://openalex.org/P4310319808"], "host_organization_lineage_names": ["Institute of Electrical and Electronics Engineers"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5022486615", "display_name": "Shouyi Wang", "orcid": "https://orcid.org/0000-0002-9046-6474"}, "institutions": [{"id": "https://openalex.org/I102322142", "display_name": "Rutgers, The State University of New Jersey", "ror": "https://ror.org/05vt9qd57", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I102322142"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Shouyi Wang", "raw_affiliation_strings": ["Department of Industrial and Systems Engineering, Rutgers The State University of New Jersey, New Brunswick, NJ, USA", "Dept. of Ind. & Syst. Eng., State Univ. of New Jersey, New Brunswick, NJ, USA"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Industrial and Systems Engineering, Rutgers The State University of New Jersey, New Brunswick, NJ, USA", "institution_ids": ["https://openalex.org/I102322142"]}, {"raw_affiliation_string": "Dept. of Ind. & Syst. Eng., State Univ. of New Jersey, New Brunswick, NJ, USA", "institution_ids": ["https://openalex.org/I102322142"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5015925945", "display_name": "W. Art Chaovalitwongse", "orcid": "https://orcid.org/0000-0002-8051-5981"}, "institutions": [{"id": "https://openalex.org/I201448701", "display_name": "University of Washington", "ror": "https://ror.org/00cvxb145", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I201448701"]}, {"id": "https://openalex.org/I2801852214", "display_name": "University of Washington Medical Center", "ror": "https://ror.org/00wbzw723", "country_code": "US", "type": "healthcare", "lineage": ["https://openalex.org/I2801852214"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Wanpracha Chaovalitwongse", "raw_affiliation_strings": ["Department of Industrial and Systems Engineering and the Department of Radiology Medical Center, University of Washington, Seattle, WA, USA", "Dept. of Ind. & Syst. Eng., Univ. of Washington, Seattle, WA, USA"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Industrial and Systems Engineering and the Department of Radiology Medical Center, University of Washington, Seattle, WA, USA", "institution_ids": ["https://openalex.org/I2801852214"]}, {"raw_affiliation_string": "Dept. of Ind. & Syst. Eng., Univ. of Washington, Seattle, WA, USA", "institution_ids": ["https://openalex.org/I201448701"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5084264842", "display_name": "Robert Babu\u0161ka", "orcid": "https://orcid.org/0000-0001-9578-8598"}, "institutions": [{"id": "https://openalex.org/I98358874", "display_name": "Delft University of Technology", "ror": "https://ror.org/02e2c7k09", "country_code": "NL", "type": "education", "lineage": ["https://openalex.org/I98358874"]}], "countries": ["NL"], "is_corresponding": false, "raw_author_name": "Robert Babuska", "raw_affiliation_strings": ["Delft Center for Systems and Control Faculty of Mechanical Engineering, Delft University of Technology, Delft, Netherlands", "Delft Center for Systems and Control, Delft University of Technology Delft, Netherlands"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Delft Center for Systems and Control Faculty of Mechanical Engineering, Delft University of Technology, Delft, Netherlands", "institution_ids": ["https://openalex.org/I98358874"]}, {"raw_affiliation_string": "Delft Center for Systems and Control, Delft University of Technology Delft, Netherlands", "institution_ids": ["https://openalex.org/I98358874"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 3, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": 1.8639, "has_fulltext": false, "cited_by_count": 112, "citation_normalized_percentile": {"value": 0.84220727, "is_in_top_1_percent": false, "is_in_top_10_percent": false}, "cited_by_percentile_year": {"min": 90, "max": 100}, "biblio": {"volume": "42", "issue": "5", "first_page": "728", "last_page": "743"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10879", "display_name": "Robotic Locomotion and Control", "score": 0.9973999857902527, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10879", "display_name": "Robotic Locomotion and Control", "score": 0.9973999857902527, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9952999949455261, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10784", "display_name": "Muscle activation and electromyography studies", "score": 0.9932000041007996, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/robot-learning", "display_name": "Robot learning", "score": 0.7103503346443176}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6619281768798828}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.639912486076355}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.613789439201355}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.610645055770874}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.5913946628570557}, {"id": "https://openalex.org/keywords/unsupervised-learning", "display_name": "Unsupervised learning", "score": 0.5346286296844482}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.49944353103637695}, {"id": "https://openalex.org/keywords/robot-control", "display_name": "Robot control", "score": 0.4706805348396301}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.21999597549438477}], "concepts": [{"id": "https://openalex.org/C188888258", "wikidata": "https://www.wikidata.org/wiki/Q7353390", "display_name": "Robot learning", "level": 4, "score": 0.7103503346443176}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6619281768798828}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.639912486076355}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.613789439201355}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.610645055770874}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.5913946628570557}, {"id": "https://openalex.org/C8038995", "wikidata": "https://www.wikidata.org/wiki/Q1152135", "display_name": "Unsupervised learning", "level": 2, "score": 0.5346286296844482}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.49944353103637695}, {"id": "https://openalex.org/C65401140", "wikidata": "https://www.wikidata.org/wiki/Q7353385", "display_name": "Robot control", "level": 4, "score": 0.4706805348396301}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.21999597549438477}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1109/tsmcc.2012.2186565", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/tsmcc.2012.2186565", "pdf_url": null, "source": {"id": "https://openalex.org/S4210227557", "display_name": "IEEE Transactions on Systems Man and Cybernetics Part C (Applications and Reviews)", "issn_l": "1094-6977", "issn": ["1094-6977", "1558-2442"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319808", "host_organization_name": "Institute of Electrical and Electronics Engineers", "host_organization_lineage": ["https://openalex.org/P4310319808"], "host_organization_lineage_names": ["Institute of Electrical and Electronics Engineers"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)", "raw_type": "journal-article"}], "best_oa_location": null, "sustainable_development_goals": [{"display_name": "Peace, Justice and strong institutions", "id": "https://metadata.un.org/sdg/16", "score": 0.7900000214576721}], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 159, "referenced_works": ["https://openalex.org/W5472403", "https://openalex.org/W8452924", "https://openalex.org/W32403112", "https://openalex.org/W44140225", "https://openalex.org/W44204917", "https://openalex.org/W69586344", "https://openalex.org/W146900863", "https://openalex.org/W147649802", "https://openalex.org/W182652162", "https://openalex.org/W211269454", "https://openalex.org/W256201028", "https://openalex.org/W628274469", "https://openalex.org/W1485447451", "https://openalex.org/W1487434675", "https://openalex.org/W1504694836", "https://openalex.org/W1506281249", "https://openalex.org/W1509098796", "https://openalex.org/W1513477390", "https://openalex.org/W1540723801", "https://openalex.org/W1545908130", "https://openalex.org/W1548139318", "https://openalex.org/W1554219854", "https://openalex.org/W1563088657", "https://openalex.org/W1594031697", "https://openalex.org/W1595500933", "https://openalex.org/W1597173708", "https://openalex.org/W1599256750", "https://openalex.org/W1601384112", "https://openalex.org/W1639032689", "https://openalex.org/W1689445748", "https://openalex.org/W1904406446", "https://openalex.org/W1949974402", "https://openalex.org/W1958483735", "https://openalex.org/W1964571600", "https://openalex.org/W1967452515", "https://openalex.org/W1970511700", "https://openalex.org/W1973916798", "https://openalex.org/W1976606095", "https://openalex.org/W1988778891", "https://openalex.org/W1990368529", "https://openalex.org/W1993740947", "https://openalex.org/W1994923984", "https://openalex.org/W1995875735", "https://openalex.org/W2002803749", "https://openalex.org/W2025681463", "https://openalex.org/W2029058516", "https://openalex.org/W2031688164", "https://openalex.org/W2033689783", "https://openalex.org/W2037729465", "https://openalex.org/W2040657708", "https://openalex.org/W2045012281", "https://openalex.org/W2049287437", "https://openalex.org/W2050829396", "https://openalex.org/W2051620263", "https://openalex.org/W2059248226", "https://openalex.org/W2064585749", "https://openalex.org/W2066004688", "https://openalex.org/W2074176074", "https://openalex.org/W2085978624", "https://openalex.org/W2087592433", "https://openalex.org/W2096426448", "https://openalex.org/W2098378004", "https://openalex.org/W2098523767", "https://openalex.org/W2102988084", "https://openalex.org/W2104101793", "https://openalex.org/W2105780862", "https://openalex.org/W2109616287", "https://openalex.org/W2111527309", "https://openalex.org/W2111966261", "https://openalex.org/W2115834920", "https://openalex.org/W2120625568", "https://openalex.org/W2121863487", "https://openalex.org/W2122059228", "https://openalex.org/W2122701159", "https://openalex.org/W2124351082", "https://openalex.org/W2125055259", "https://openalex.org/W2125177287", "https://openalex.org/W2125612430", "https://openalex.org/W2127036885", "https://openalex.org/W2127107099", "https://openalex.org/W2127763473", "https://openalex.org/W2129897837", "https://openalex.org/W2130801532", "https://openalex.org/W2134030618", "https://openalex.org/W2135393140", "https://openalex.org/W2136278509", "https://openalex.org/W2137123963", "https://openalex.org/W2137290292", "https://openalex.org/W2139053308", "https://openalex.org/W2139212933", "https://openalex.org/W2140699235", "https://openalex.org/W2142196876", "https://openalex.org/W2142849822", "https://openalex.org/W2145467382", "https://openalex.org/W2145944877", "https://openalex.org/W2147613597", "https://openalex.org/W2149684865", "https://openalex.org/W2149706766", "https://openalex.org/W2150942036", "https://openalex.org/W2151182239", "https://openalex.org/W2153756422", "https://openalex.org/W2154642048", "https://openalex.org/W2155307968", "https://openalex.org/W2156737235", "https://openalex.org/W2158316397", "https://openalex.org/W2159559414", "https://openalex.org/W2160111545", "https://openalex.org/W2161960932", "https://openalex.org/W2163668399", "https://openalex.org/W2164936082", "https://openalex.org/W2165755010", "https://openalex.org/W2167804690", "https://openalex.org/W2168507184", "https://openalex.org/W2170642268", "https://openalex.org/W2171369025", "https://openalex.org/W2172993457", "https://openalex.org/W2176864480", "https://openalex.org/W2253954578", "https://openalex.org/W2337116699", "https://openalex.org/W2473732573", "https://openalex.org/W2478562489", "https://openalex.org/W2539571344", "https://openalex.org/W2766736793", "https://openalex.org/W2799061466", "https://openalex.org/W2904250082", "https://openalex.org/W2911678770", "https://openalex.org/W2914508454", "https://openalex.org/W2989068617", "https://openalex.org/W2993383518", "https://openalex.org/W3023540311", "https://openalex.org/W3085162807", "https://openalex.org/W4205687621", "https://openalex.org/W4214717370", "https://openalex.org/W4232335189", "https://openalex.org/W4236137412", "https://openalex.org/W4241087722", "https://openalex.org/W4246760750", "https://openalex.org/W4252510982", "https://openalex.org/W4255930413", "https://openalex.org/W4285719527", "https://openalex.org/W6600212477", "https://openalex.org/W6601750520", "https://openalex.org/W6601848611", "https://openalex.org/W6632597099", "https://openalex.org/W6632920007", "https://openalex.org/W6639845178", "https://openalex.org/W6672032040", "https://openalex.org/W6677236988", "https://openalex.org/W6678494045", "https://openalex.org/W6679257226", "https://openalex.org/W6680463366", "https://openalex.org/W6681802191", "https://openalex.org/W6682225842", "https://openalex.org/W6682304300", "https://openalex.org/W6682610290", "https://openalex.org/W6685191362", "https://openalex.org/W6685331038", "https://openalex.org/W6691942190", "https://openalex.org/W7062647797"], "related_works": ["https://openalex.org/W3203657119", "https://openalex.org/W4286952720", "https://openalex.org/W2930863966", "https://openalex.org/W2126211886", "https://openalex.org/W2171912896", "https://openalex.org/W3153786280", "https://openalex.org/W3127551068", "https://openalex.org/W2643884694", "https://openalex.org/W2165180011", "https://openalex.org/W4220829754"], "abstract_inverted_index": {"Over": [0], "the": [1, 21, 84, 112, 127], "past": [2], "decades,": [3], "machine": [4], "learning": [5, 26, 61, 86, 101, 119], "techniques,": [6], "such": [7, 48], "as": [8], "supervised": [9], "learning,": [10, 12, 15], "reinforcement": [11], "and": [13, 35, 42, 88, 97], "unsupervised": [14], "have": [16, 28, 63], "been": [17, 29], "increasingly": [18], "used": [19], "in": [20, 57, 126], "control": [22, 44, 70, 120], "engineering": [23], "community.": [24], "Various": [25], "algorithms": [27, 87], "developed": [30], "to": [31, 67, 91], "achieve": [32], "autonomous": [33], "operation": [34], "intelligent": [36], "decision": [37], "making": [38], "for": [39, 72, 115], "many": [40], "complex": [41], "challenging": [43], "problems.": [45], "One": [46], "of": [47, 80, 99, 109, 121], "problems": [49], "is": [50], "bipedal": [51, 73, 92, 122], "walking": [52], "robot": [53, 93], "control.": [54, 94], "Although": [55], "still": [56], "their": [58, 89], "early": [59], "stages,": [60], "techniques": [62, 102], "demonstrated": [64], "promising": [65], "potential": [66], "build": [68], "adaptive": [69], "systems": [71], "robots.": [74], "This": [75], "paper": [76], "gives": [77], "a": [78, 106], "review": [79], "recent": [81], "advances": [82], "on": [83, 118], "state-of-the-art": [85], "applications": [90], "The": [95], "effects": [96], "limitations": [98], "different": [100], "are": [103, 124], "discussed": [104], "through": [105], "representative": [107], "selection": [108], "examples": [110], "from": [111], "literature.": [113], "Guidelines": [114], "future": [116], "research": [117], "robots": [123], "provided": [125], "end.": [128]}, "counts_by_year": [{"year": 2026, "cited_by_count": 7}, {"year": 2025, "cited_by_count": 8}, {"year": 2024, "cited_by_count": 13}, {"year": 2023, "cited_by_count": 10}, {"year": 2022, "cited_by_count": 11}, {"year": 2021, "cited_by_count": 12}, {"year": 2020, "cited_by_count": 13}, {"year": 2019, "cited_by_count": 16}, {"year": 2018, "cited_by_count": 7}, {"year": 2017, "cited_by_count": 4}, {"year": 2016, "cited_by_count": 2}, {"year": 2015, "cited_by_count": 5}, {"year": 2014, "cited_by_count": 3}, {"year": 2013, "cited_by_count": 1}], "updated_date": "2026-06-11T09:08:48.828518", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W4386718967", "doi": "https://doi.org/10.1126/scirobotics.adg1462", "title": "Reaching the limit in autonomous racing: Optimal control versus reinforcement learning", "display_name": "Reaching the limit in autonomous racing: Optimal control versus reinforcement learning", "relevance_score": 240.36598, "publication_year": 2023, "publication_date": "2023-09-13", "ids": {"openalex": "https://openalex.org/W4386718967", "doi": "https://doi.org/10.1126/scirobotics.adg1462", "pmid": "https://pubmed.ncbi.nlm.nih.gov/37703383"}, "language": "en", "primary_location": {"id": "doi:10.1126/scirobotics.adg1462", "is_oa": false, "landing_page_url": "https://doi.org/10.1126/scirobotics.adg1462", "pdf_url": null, "source": {"id": "https://openalex.org/S4210213233", "display_name": "Science Robotics", "issn_l": "2470-9476", "issn": ["2470-9476"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310315823", "host_organization_name": "American Association for the Advancement of Science", "host_organization_lineage": ["https://openalex.org/P4310315823"], "host_organization_lineage_names": ["American Association for the Advancement of Science"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Science Robotics", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["arxiv", "crossref", "datacite", "pubmed"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": null, "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5085971429", "display_name": "Yunlong Song", "orcid": "https://orcid.org/0000-0002-6352-3744"}, "institutions": [{"id": "https://openalex.org/I202697423", "display_name": "University of Zurich", "ror": "https://ror.org/02crff812", "country_code": "CH", "type": "education", "lineage": ["https://openalex.org/I202697423"]}], "countries": ["CH"], "is_corresponding": true, "raw_author_name": "Yunlong Song", "raw_affiliation_strings": ["University of Zurich, Zurich, Switzerland"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Zurich, Zurich, Switzerland", "institution_ids": ["https://openalex.org/I202697423"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5013089045", "display_name": "\u00c1ngel Romero", "orcid": "https://orcid.org/0000-0002-7977-7802"}, "institutions": [{"id": "https://openalex.org/I202697423", "display_name": "University of Zurich", "ror": "https://ror.org/02crff812", "country_code": "CH", "type": "education", "lineage": ["https://openalex.org/I202697423"]}], "countries": ["CH"], "is_corresponding": false, "raw_author_name": "Angel Romero", "raw_affiliation_strings": ["University of Zurich, Zurich, Switzerland"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Zurich, Zurich, Switzerland", "institution_ids": ["https://openalex.org/I202697423"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5056245846", "display_name": "Matthias M. M\u00fcller", "orcid": "https://orcid.org/0000-0002-1428-9680"}, "institutions": [{"id": "https://openalex.org/I1343180700", "display_name": "Intel (United States)", "ror": "https://ror.org/01ek73717", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1343180700"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Matthias M\u00fcller", "raw_affiliation_strings": ["Intel Labs, Jackson, WY, USA"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Intel Labs, Jackson, WY, USA", "institution_ids": ["https://openalex.org/I1343180700"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5006181255", "display_name": "Vladlen Koltun", "orcid": "https://orcid.org/0000-0003-0858-0970"}, "institutions": [{"id": "https://openalex.org/I1343180700", "display_name": "Intel (United States)", "ror": "https://ror.org/01ek73717", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1343180700"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Vladlen Koltun", "raw_affiliation_strings": ["Intel Labs, Jackson, WY, USA"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Intel Labs, Jackson, WY, USA", "institution_ids": ["https://openalex.org/I1343180700"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5057116316", "display_name": "Davide Scaramuzza", "orcid": "https://orcid.org/0000-0002-3831-6778"}, "institutions": [{"id": "https://openalex.org/I202697423", "display_name": "University of Zurich", "ror": "https://ror.org/02crff812", "country_code": "CH", "type": "education", "lineage": ["https://openalex.org/I202697423"]}], "countries": ["CH"], "is_corresponding": false, "raw_author_name": "Davide Scaramuzza", "raw_affiliation_strings": ["University of Zurich, Zurich, Switzerland"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Zurich, Zurich, Switzerland", "institution_ids": ["https://openalex.org/I202697423"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 2, "corresponding_author_ids": ["https://openalex.org/A5085971429"], "corresponding_institution_ids": ["https://openalex.org/I202697423"], "apc_list": null, "apc_paid": null, "fwci": 31.3333, "has_fulltext": false, "cited_by_count": 196, "citation_normalized_percentile": {"value": 0.99790436, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 98, "max": 100}, "biblio": {"volume": "8", "issue": "82", "first_page": "eadg1462", "last_page": "eadg1462"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9994000196456909, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9994000196456909, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11099", "display_name": "Autonomous Vehicle Technology and Safety", "score": 0.9976999759674072, "subfield": {"id": "https://openalex.org/subfields/2203", "display_name": "Automotive Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10586", "display_name": "Robotic Path Planning Algorithms", "score": 0.9957000017166138, "subfield": {"id": "https://openalex.org/subfields/1707", "display_name": "Computer Vision and Pattern Recognition"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8075485229492188}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.6726484298706055}, {"id": "https://openalex.org/keywords/agile-software-development", "display_name": "Agile software development", "score": 0.5981022119522095}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5875877141952515}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5793664455413818}, {"id": "https://openalex.org/keywords/controller", "display_name": "Controller (irrigation)", "score": 0.5740841627120972}, {"id": "https://openalex.org/keywords/leverage", "display_name": "Leverage (statistics)", "score": 0.5119867920875549}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.5014400482177734}, {"id": "https://openalex.org/keywords/trajectory", "display_name": "Trajectory", "score": 0.4141003489494324}, {"id": "https://openalex.org/keywords/control-engineering", "display_name": "Control engineering", "score": 0.39349836111068726}, {"id": "https://openalex.org/keywords/control-theory", "display_name": "Control theory (sociology)", "score": 0.3816871643066406}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.275588721036911}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.2542179226875305}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8075485229492188}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.6726484298706055}, {"id": "https://openalex.org/C14185376", "wikidata": "https://www.wikidata.org/wiki/Q30232", "display_name": "Agile software development", "level": 2, "score": 0.5981022119522095}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5875877141952515}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5793664455413818}, {"id": "https://openalex.org/C203479927", "wikidata": "https://www.wikidata.org/wiki/Q5165939", "display_name": "Controller (irrigation)", "level": 2, "score": 0.5740841627120972}, {"id": "https://openalex.org/C153083717", "wikidata": "https://www.wikidata.org/wiki/Q6535263", "display_name": "Leverage (statistics)", "level": 2, "score": 0.5119867920875549}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.5014400482177734}, {"id": "https://openalex.org/C13662910", "wikidata": "https://www.wikidata.org/wiki/Q193139", "display_name": "Trajectory", "level": 2, "score": 0.4141003489494324}, {"id": "https://openalex.org/C133731056", "wikidata": "https://www.wikidata.org/wiki/Q4917288", "display_name": "Control engineering", "level": 1, "score": 0.39349836111068726}, {"id": "https://openalex.org/C47446073", "wikidata": "https://www.wikidata.org/wiki/Q5165890", "display_name": "Control theory (sociology)", "level": 3, "score": 0.3816871643066406}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.275588721036911}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.2542179226875305}, {"id": "https://openalex.org/C6557445", "wikidata": "https://www.wikidata.org/wiki/Q173113", "display_name": "Agronomy", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C1276947", "wikidata": "https://www.wikidata.org/wiki/Q333", "display_name": "Astronomy", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C115903868", "wikidata": "https://www.wikidata.org/wiki/Q80993", "display_name": "Software engineering", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 5, "locations": [{"id": "doi:10.1126/scirobotics.adg1462", "is_oa": false, "landing_page_url": "https://doi.org/10.1126/scirobotics.adg1462", "pdf_url": null, "source": {"id": "https://openalex.org/S4210213233", "display_name": "Science Robotics", "issn_l": "2470-9476", "issn": ["2470-9476"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310315823", "host_organization_name": "American Association for the Advancement of Science", "host_organization_lineage": ["https://openalex.org/P4310315823"], "host_organization_lineage_names": ["American Association for the Advancement of Science"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Science Robotics", "raw_type": "journal-article"}, {"id": "pmid:37703383", "is_oa": false, "landing_page_url": "https://pubmed.ncbi.nlm.nih.gov/37703383", "pdf_url": null, "source": {"id": "https://openalex.org/S4306525036", "display_name": "PubMed", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I1299303238", "host_organization_name": "National Institutes of Health", "host_organization_lineage": ["https://openalex.org/I1299303238"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Science robotics", "raw_type": null}, {"id": "pmh:oai:www.zora.uzh.ch:257394", "is_oa": true, "landing_page_url": null, "pdf_url": null, "source": {"id": "https://openalex.org/S4306401281", "display_name": "Zurich Open Repository and Archive (University of Zurich)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I202697423", "host_organization_name": "University of Zurich", "host_organization_lineage": ["https://openalex.org/I202697423"], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "acceptedVersion", "is_accepted": true, "is_published": false, "raw_source_name": "Song, Yunlong; Romero, Angel; M\u00fcller, Matthias; Koltun, Vladlen; Scaramuzza, Davide  (2023). Reaching the limit in autonomous racing: Optimal control versus reinforcement learning.  Science Robotics, 8(82):adg1462.", "raw_type": "info:eu-repo/semantics/article"}, {"id": "pmh:oai:arXiv.org:2310.10943", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/2310.10943", "pdf_url": "https://arxiv.org/pdf/2310.10943", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "text"}, {"id": "doi:10.5167/uzh-257394", "is_oa": true, "landing_page_url": "https://doi.org/10.5167/uzh-257394", "pdf_url": null, "source": {"id": "https://openalex.org/S7407051291", "display_name": "Universit\u00e4t Z\u00fcrich, ZORA", "issn_l": null, "issn": [], "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": "JournalArticle"}], "best_oa_location": {"id": "pmh:oai:www.zora.uzh.ch:257394", "is_oa": true, "landing_page_url": null, "pdf_url": null, "source": {"id": "https://openalex.org/S4306401281", "display_name": "Zurich Open Repository and Archive (University of Zurich)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I202697423", "host_organization_name": "University of Zurich", "host_organization_lineage": ["https://openalex.org/I202697423"], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "acceptedVersion", "is_accepted": true, "is_published": false, "raw_source_name": "Song, Yunlong; Romero, Angel; M\u00fcller, Matthias; Koltun, Vladlen; Scaramuzza, Davide  (2023). Reaching the limit in autonomous racing: Optimal control versus reinforcement learning.  Science Robotics, 8(82):adg1462.", "raw_type": "info:eu-repo/semantics/article"}, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 46, "referenced_works": ["https://openalex.org/W1846830349", "https://openalex.org/W1945123189", "https://openalex.org/W2105080564", "https://openalex.org/W2142424817", "https://openalex.org/W2145339207", "https://openalex.org/W2162991084", "https://openalex.org/W2257979135", "https://openalex.org/W2343568200", "https://openalex.org/W2418368699", "https://openalex.org/W2482392012", "https://openalex.org/W2491934829", "https://openalex.org/W2510089447", "https://openalex.org/W2654286404", "https://openalex.org/W2733312032", "https://openalex.org/W2771691050", "https://openalex.org/W2909553221", "https://openalex.org/W2911956173", "https://openalex.org/W2923554444", "https://openalex.org/W2962890638", "https://openalex.org/W2963689432", "https://openalex.org/W2963796870", "https://openalex.org/W2965215408", "https://openalex.org/W2982095652", "https://openalex.org/W2982316857", "https://openalex.org/W3040334549", "https://openalex.org/W3091903741", "https://openalex.org/W3093922502", "https://openalex.org/W3102552342", "https://openalex.org/W3104730803", "https://openalex.org/W3104876774", "https://openalex.org/W3107153805", "https://openalex.org/W3155272911", "https://openalex.org/W3177010373", "https://openalex.org/W3185165122", "https://openalex.org/W3216772467", "https://openalex.org/W4205430897", "https://openalex.org/W4206742276", "https://openalex.org/W4213377513", "https://openalex.org/W4283170591", "https://openalex.org/W4283262038", "https://openalex.org/W4285102199", "https://openalex.org/W4285163488", "https://openalex.org/W4293160779", "https://openalex.org/W4298427820", "https://openalex.org/W4299573229", "https://openalex.org/W4386285856"], "related_works": ["https://openalex.org/W2656997359", "https://openalex.org/W4306904969", "https://openalex.org/W2138720691", "https://openalex.org/W4362501864", "https://openalex.org/W1941703695", "https://openalex.org/W4380318855", "https://openalex.org/W2031695474", "https://openalex.org/W2024136090", "https://openalex.org/W3131574667", "https://openalex.org/W2768698792"], "abstract_inverted_index": {"A": [0], "central": [1], "question": [2, 21], "in": [3, 48, 217, 230], "robotics": [4, 219], "is": [5, 79], "how": [6], "to": [7, 59, 131, 153, 170, 175], "design": [8], "a": [9, 25, 34, 91, 109, 145, 180, 191, 209, 215], "control": [10, 45, 101, 133, 164, 203], "system": [11], "for": [12], "an": [13, 103, 114, 172], "agile": [14, 173, 218], "mobile": [15], "robot.": [16], "This": [17, 116, 212], "paper": [18], "studies": [19], "this": [20, 49], "systematically,": [22], "focusing": [23], "on": [24, 208, 223], "challenging": [26], "setting:": [27], "autonomous": [28], "drone": [29, 174], "racing.": [30], "We": [31, 51], "show": [32], "that": [33, 71, 81, 88, 111, 123], "neural": [35], "network": [36], "controller": [37], "trained": [38], "with": [39, 102, 155], "reinforcement": [40], "learning": [41], "(RL)": [42], "outperformed": [43], "optimal": [44], "(OC)": [46], "methods": [47], "setting.": [50], "then": [52], "investigated": [53], "which": [54], "fundamental": [55, 73], "factors": [56], "have": [57, 65], "contributed": [58], "the": [60, 72, 96, 119, 128, 159, 187, 224], "success": [61], "of": [62, 75, 121, 161, 194, 206, 226], "RL": [63, 76, 141, 227], "or": [64], "limited": [66], "OC.": [67], "Our": [68, 166, 199], "study": [69], "indicates": [70], "advantage": [74], "over": [77], "OC": [78, 94, 229], "not": [80], "it": [82, 89], "optimizes": [83, 90], "its": [84, 176], "objective": [85, 147], "better": [86, 92], "but": [87], "objective.": [93], "decomposes": [95], "problem": [97], "into": [98], "planning": [99], "and": [100, 148, 190, 220, 228], "explicit": [104], "intermediate": [105], "representation,": [106], "such": [107], "as": [108, 113], "trajectory,": [110], "serves": [112], "interface.": [115], "decomposition": [117], "limits": [118], "range": [120], "behaviors": [122], "can": [124, 142, 149], "be": [125], "expressed": [126], "by": [127], "controller,": [129], "leading": [130], "inferior": [132], "performance": [134], "when": [135], "facing": [136], "unmodeled": [137], "effects.": [138], "In": [139], "contrast,": [140], "directly": [143], "optimize": [144], "task-level": [146], "leverage": [150], "domain": [151], "randomization": [152], "cope": [154], "model": [156], "uncertainty,": [157], "allowing": [158], "discovery": [160], "more": [162], "robust": [163], "responses.": [165], "findings": [167], "allowed": [168], "us": [169], "push": [171], "maximum": [177], "performance,": [178], "achieving": [179], "peak": [181, 192], "acceleration": [182, 189], "greater": [183], "than": [184], "12": [185], "times": [186], "gravitational": [188], "velocity": [193], "108": [195], "kilometers": [196], "per": [197], "hour.": [198], "policy": [200], "achieved": [201], "superhuman": [202], "within": [204], "minutes": [205], "training": [207], "standard": [210], "workstation.": [211], "work": [213], "presents": [214], "milestone": [216], "sheds": [221], "light": [222], "role": [225], "robot": [231], "control.": [232]}, "counts_by_year": [{"year": 2026, "cited_by_count": 28}, {"year": 2025, "cited_by_count": 100}, {"year": 2024, "cited_by_count": 62}, {"year": 2023, "cited_by_count": 6}], "updated_date": "2026-07-03T08:13:44.112507", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2787666871", "doi": "https://doi.org/10.1016/j.neunet.2018.07.006", "title": "State representation learning for control: An overview", "display_name": "State representation learning for control: An overview", "relevance_score": 239.41261, "publication_year": 2018, "publication_date": "2018-08-04", "ids": {"openalex": "https://openalex.org/W2787666871", "doi": "https://doi.org/10.1016/j.neunet.2018.07.006", "mag": "2787666871", "pmid": "https://pubmed.ncbi.nlm.nih.gov/30268059"}, "language": "en", "primary_location": {"id": "doi:10.1016/j.neunet.2018.07.006", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.neunet.2018.07.006", "pdf_url": null, "source": {"id": "https://openalex.org/S123019304", "display_name": "Neural Networks", "issn_l": "0893-6080", "issn": ["0893-6080", "1879-2782"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Neural Networks", "raw_type": "journal-article"}, "type": "review", "indexed_in": ["arxiv", "crossref", "pubmed"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": "https://arxiv.org/pdf/1802.04181", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5006026661", "display_name": "Timoth\u00e9e Lesort", "orcid": "https://orcid.org/0000-0002-8669-0764"}, "institutions": [{"id": "https://openalex.org/I201181511", "display_name": "\u00c9cole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es", "ror": "https://ror.org/0309cs235", "country_code": "FR", "type": "education", "lineage": ["https://openalex.org/I201181511", "https://openalex.org/I4210145102"]}, {"id": "https://openalex.org/I4210140930", "display_name": "Thales (France)", "ror": "https://ror.org/04emwm605", "country_code": "FR", "type": "company", "lineage": ["https://openalex.org/I4210140930"]}], "countries": ["FR"], "is_corresponding": false, "raw_author_name": "Timoth\u00e9e Lesort", "raw_affiliation_strings": ["Vision Lab, Thales, Theresis, Palaiseau, France; U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: timothee.lesort@thalesgroup.com", "Thales Research and Technology [Palaiseau] (1 Avenue Augustin Fresnel, 91767 Palaiseau cedex - France)", "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Vision Lab, Thales, Theresis, Palaiseau, France; U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: timothee.lesort@thalesgroup.com", "institution_ids": ["https://openalex.org/I201181511"]}, {"raw_affiliation_string": "Thales Research and Technology [Palaiseau] (1 Avenue Augustin Fresnel, 91767 Palaiseau cedex - France)", "institution_ids": ["https://openalex.org/I4210140930"]}, {"raw_affiliation_string": "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "institution_ids": []}, {"raw_affiliation_string": "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)", "institution_ids": []}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5058176171", "display_name": "Natalia D\u00edaz-Rodr\u00edguez", "orcid": "https://orcid.org/0000-0003-3362-9326"}, "institutions": [{"id": "https://openalex.org/I201181511", "display_name": "\u00c9cole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es", "ror": "https://ror.org/0309cs235", "country_code": "FR", "type": "education", "lineage": ["https://openalex.org/I201181511", "https://openalex.org/I4210145102"]}], "countries": ["FR"], "is_corresponding": false, "raw_author_name": "Natalia D\u00edaz-Rodr\u00edguez", "raw_affiliation_strings": ["U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: natalia.diaz@ensta-paristech.fr", "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)", "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)"], "raw_orcid": "https://orcid.org/0000-0003-3362-9326", "affiliations": [{"raw_affiliation_string": "U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: natalia.diaz@ensta-paristech.fr", "institution_ids": ["https://openalex.org/I201181511"]}, {"raw_affiliation_string": "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)", "institution_ids": []}, {"raw_affiliation_string": "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "institution_ids": []}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5112346289", "display_name": "Jean-Frano\u0327is Goudou", "orcid": null}, "institutions": [{"id": "https://openalex.org/I2801356230", "display_name": "Thales (Australia)", "ror": "https://ror.org/00f7vya03", "country_code": "AU", "type": "company", "lineage": ["https://openalex.org/I2801356230", "https://openalex.org/I4210140930"]}, {"id": "https://openalex.org/I4210140930", "display_name": "Thales (France)", "ror": "https://ror.org/04emwm605", "country_code": "FR", "type": "company", "lineage": ["https://openalex.org/I4210140930"]}], "countries": ["AU", "FR"], "is_corresponding": false, "raw_author_name": "Jean-Frano\u0327is Goudou", "raw_affiliation_strings": ["Vision Lab, Thales, Theresis, Palaiseau, France. Electronic address: jean-francois.goudou@thalesgroup.com", "Thales Research and Technology [Palaiseau] (1 Avenue Augustin Fresnel, 91767 Palaiseau cedex - France)"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Vision Lab, Thales, Theresis, Palaiseau, France. Electronic address: jean-francois.goudou@thalesgroup.com", "institution_ids": ["https://openalex.org/I2801356230"]}, {"raw_affiliation_string": "Thales Research and Technology [Palaiseau] (1 Avenue Augustin Fresnel, 91767 Palaiseau cedex - France)", "institution_ids": ["https://openalex.org/I4210140930"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5027931470", "display_name": "David Filliat", "orcid": "https://orcid.org/0000-0002-5739-1618"}, "institutions": [{"id": "https://openalex.org/I201181511", "display_name": "\u00c9cole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es", "ror": "https://ror.org/0309cs235", "country_code": "FR", "type": "education", "lineage": ["https://openalex.org/I201181511", "https://openalex.org/I4210145102"]}], "countries": ["FR"], "is_corresponding": false, "raw_author_name": "David Filliat", "raw_affiliation_strings": ["U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: david.filliat@ensta.fr", "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: david.filliat@ensta.fr", "institution_ids": ["https://openalex.org/I201181511"]}, {"raw_affiliation_string": "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "institution_ids": []}, {"raw_affiliation_string": "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)", "institution_ids": []}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 3, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": {"value": 3350, "currency": "USD", "value_usd": 3350}, "apc_paid": null, "fwci": 24.3828, "has_fulltext": false, "cited_by_count": 297, "citation_normalized_percentile": {"value": 0.99532803, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 99, "max": 100}, "biblio": {"volume": "108", "issue": null, "first_page": "379", "last_page": "392"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9976999759674072, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9976999759674072, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12072", "display_name": "Machine Learning and Algorithms", "score": 0.9930999875068665, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11307", "display_name": "Domain Adaptation and Few-Shot Learning", "score": 0.9909999966621399, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/representation", "display_name": "Representation (politics)", "score": 0.7435513734817505}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7278826832771301}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7181282043457031}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.7168611288070679}, {"id": "https://openalex.org/keywords/curse-of-dimensionality", "display_name": "Curse of dimensionality", "score": 0.6658106446266174}, {"id": "https://openalex.org/keywords/feature-learning", "display_name": "Feature learning", "score": 0.5952423810958862}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.5883178114891052}, {"id": "https://openalex.org/keywords/dimension", "display_name": "Dimension (graph theory)", "score": 0.5524753332138062}, {"id": "https://openalex.org/keywords/external-data-representation", "display_name": "External Data Representation", "score": 0.511781632900238}, {"id": "https://openalex.org/keywords/state", "display_name": "State (computer science)", "score": 0.47414276003837585}, {"id": "https://openalex.org/keywords/implementation", "display_name": "Implementation", "score": 0.47349199652671814}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.4460316300392151}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.4317273795604706}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.2498774528503418}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.08870789408683777}, {"id": "https://openalex.org/keywords/algorithm", "display_name": "Algorithm", "score": 0.0863552987575531}, {"id": "https://openalex.org/keywords/software-engineering", "display_name": "Software engineering", "score": 0.0806734561920166}], "concepts": [{"id": "https://openalex.org/C2776359362", "wikidata": "https://www.wikidata.org/wiki/Q2145286", "display_name": "Representation (politics)", "level": 3, "score": 0.7435513734817505}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7278826832771301}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7181282043457031}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.7168611288070679}, {"id": "https://openalex.org/C111030470", "wikidata": "https://www.wikidata.org/wiki/Q1430460", "display_name": "Curse of dimensionality", "level": 2, "score": 0.6658106446266174}, {"id": "https://openalex.org/C59404180", "wikidata": "https://www.wikidata.org/wiki/Q17013334", "display_name": "Feature learning", "level": 2, "score": 0.5952423810958862}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.5883178114891052}, {"id": "https://openalex.org/C33676613", "wikidata": "https://www.wikidata.org/wiki/Q13415176", "display_name": "Dimension (graph theory)", "level": 2, "score": 0.5524753332138062}, {"id": "https://openalex.org/C116409475", "wikidata": "https://www.wikidata.org/wiki/Q1385056", "display_name": "External Data Representation", "level": 2, "score": 0.511781632900238}, {"id": "https://openalex.org/C48103436", "wikidata": "https://www.wikidata.org/wiki/Q599031", "display_name": "State (computer science)", "level": 2, "score": 0.47414276003837585}, {"id": "https://openalex.org/C26713055", "wikidata": "https://www.wikidata.org/wiki/Q245962", "display_name": "Implementation", "level": 2, "score": 0.47349199652671814}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.4460316300392151}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.4317273795604706}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.2498774528503418}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.08870789408683777}, {"id": "https://openalex.org/C11413529", "wikidata": "https://www.wikidata.org/wiki/Q8366", "display_name": "Algorithm", "level": 1, "score": 0.0863552987575531}, {"id": "https://openalex.org/C115903868", "wikidata": "https://www.wikidata.org/wiki/Q80993", "display_name": "Software engineering", "level": 1, "score": 0.0806734561920166}, {"id": "https://openalex.org/C94625758", "wikidata": "https://www.wikidata.org/wiki/Q7163", "display_name": "Politics", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C202444582", "wikidata": "https://www.wikidata.org/wiki/Q837863", "display_name": "Pure mathematics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C17744445", "wikidata": "https://www.wikidata.org/wiki/Q36442", "display_name": "Political science", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C199539241", "wikidata": "https://www.wikidata.org/wiki/Q7748", "display_name": "Law", "level": 1, "score": 0.0}], "mesh": [{"descriptor_ui": "D000069550", "descriptor_name": "Machine Learning", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": true}, {"descriptor_ui": "D000069550", "descriptor_name": "Machine Learning", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": true}, {"descriptor_ui": "D000069550", "descriptor_name": "Machine Learning", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": true}, {"descriptor_ui": "D000465", "descriptor_name": "Algorithms", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": true}, {"descriptor_ui": "D000465", "descriptor_name": "Algorithms", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": true}, {"descriptor_ui": "D000465", "descriptor_name": "Algorithms", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": true}, {"descriptor_ui": "D006801", "descriptor_name": "Humans", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D006801", "descriptor_name": "Humans", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D006801", "descriptor_name": "Humans", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D012054", "descriptor_name": "Reinforcement, Psychology", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D012054", "descriptor_name": "Reinforcement, Psychology", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D012054", "descriptor_name": "Reinforcement, Psychology", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000379", "qualifier_name": "methods", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000379", "qualifier_name": "methods", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000379", "qualifier_name": "methods", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": false}], "locations_count": 5, "locations": [{"id": "doi:10.1016/j.neunet.2018.07.006", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.neunet.2018.07.006", "pdf_url": null, "source": {"id": "https://openalex.org/S123019304", "display_name": "Neural Networks", "issn_l": "0893-6080", "issn": ["0893-6080", "1879-2782"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Neural Networks", "raw_type": "journal-article"}, {"id": "pmid:30268059", "is_oa": false, "landing_page_url": "https://pubmed.ncbi.nlm.nih.gov/30268059", "pdf_url": null, "source": {"id": "https://openalex.org/S4306525036", "display_name": "PubMed", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I1299303238", "host_organization_name": "National Institutes of Health", "host_organization_lineage": ["https://openalex.org/I1299303238"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Neural networks : the official journal of the International Neural Network Society", "raw_type": null}, {"id": "pmh:oai:arXiv.org:1802.04181", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1802.04181", "pdf_url": "https://arxiv.org/pdf/1802.04181", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "text"}, {"id": "pmh:oai:HAL:hal-01858558v1", "is_oa": true, "landing_page_url": "https://hal.science/hal-01858558", "pdf_url": null, "source": {"id": "https://openalex.org/S4306402512", "display_name": "HAL (Le Centre pour la Communication Scientifique Directe)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I1294671590", "host_organization_name": "Centre National de la Recherche Scientifique", "host_organization_lineage": ["https://openalex.org/I1294671590"], "host_organization_lineage_names": [], "type": "repository"}, "license": "other-oa", "license_id": "https://openalex.org/licenses/other-oa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Neural Networks, 2018, 108, pp.379-392. &#x27E8;10.1016/j.neunet.2018.07.006&#x27E9;", "raw_type": "info:eu-repo/semantics/article"}, {"id": "pmh:oai:digibug.ugr.es:10481/88347", "is_oa": true, "landing_page_url": "https://hdl.handle.net/10481/88347", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400567", "display_name": "Institutional Repository of the University of Granada (University of Granada)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I173304897", "host_organization_name": "Universidad de Granada", "host_organization_lineage": ["https://openalex.org/I173304897"], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by-nc-nd", "license_id": "https://openalex.org/licenses/cc-by-nc-nd", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "info:eu-repo/semantics/article"}], "best_oa_location": {"id": "pmh:oai:arXiv.org:1802.04181", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1802.04181", "pdf_url": "https://arxiv.org/pdf/1802.04181", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "text"}, "sustainable_development_goals": [{"display_name": "Peace, Justice and strong institutions", "id": "https://metadata.un.org/sdg/16", "score": 0.5099999904632568}], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 139, "referenced_works": ["https://openalex.org/W60493759", "https://openalex.org/W199564175", "https://openalex.org/W834081922", "https://openalex.org/W1164749991", "https://openalex.org/W1515851193", "https://openalex.org/W1530239281", "https://openalex.org/W1571401318", "https://openalex.org/W1580389772", "https://openalex.org/W1583837637", "https://openalex.org/W1591713425", "https://openalex.org/W1625577255", "https://openalex.org/W1898476659", "https://openalex.org/W1909320841", "https://openalex.org/W1957496711", "https://openalex.org/W1959608418", "https://openalex.org/W1990693003", "https://openalex.org/W2002563186", "https://openalex.org/W2025768430", "https://openalex.org/W2061478368", "https://openalex.org/W2085627234", "https://openalex.org/W2094024286", "https://openalex.org/W2099471712", "https://openalex.org/W2101524054", "https://openalex.org/W2115668428", "https://openalex.org/W2136922672", "https://openalex.org/W2140135625", "https://openalex.org/W2145094598", "https://openalex.org/W2145339207", "https://openalex.org/W2146444479", "https://openalex.org/W2150468603", "https://openalex.org/W2157364932", "https://openalex.org/W2159008857", "https://openalex.org/W2173248099", "https://openalex.org/W2177066871", "https://openalex.org/W2179072710", "https://openalex.org/W2210483910", "https://openalex.org/W2260158983", "https://openalex.org/W2337977475", "https://openalex.org/W2338684808", "https://openalex.org/W2396178844", "https://openalex.org/W2400532028", "https://openalex.org/W2411541852", "https://openalex.org/W2412320034", "https://openalex.org/W2426267443", "https://openalex.org/W2434741482", "https://openalex.org/W2442341664", "https://openalex.org/W2473208550", "https://openalex.org/W2493343568", "https://openalex.org/W2534725218", "https://openalex.org/W2548275288", "https://openalex.org/W2559655401", "https://openalex.org/W2563830277", "https://openalex.org/W2567455162", "https://openalex.org/W2570734388", "https://openalex.org/W2609990452", "https://openalex.org/W2613677041", "https://openalex.org/W2614839826", "https://openalex.org/W2619484182", "https://openalex.org/W2735796404", "https://openalex.org/W2739083961", "https://openalex.org/W2751258126", "https://openalex.org/W2753738274", "https://openalex.org/W2754517384", "https://openalex.org/W2754695199", "https://openalex.org/W2758237641", "https://openalex.org/W2767624189", "https://openalex.org/W2775192669", "https://openalex.org/W2779977383", "https://openalex.org/W2781585732", "https://openalex.org/W2786917922", "https://openalex.org/W2799245749", "https://openalex.org/W2903420114", "https://openalex.org/W2949296282", "https://openalex.org/W2951004968", "https://openalex.org/W2951751411", "https://openalex.org/W2962808998", "https://openalex.org/W2962899390", "https://openalex.org/W2963009616", "https://openalex.org/W2963226019", "https://openalex.org/W2963305465", "https://openalex.org/W2963375351", "https://openalex.org/W2963430173", "https://openalex.org/W2963523627", "https://openalex.org/W2963634205", "https://openalex.org/W2963826370", "https://openalex.org/W2963864421", "https://openalex.org/W2963979925", "https://openalex.org/W2964112890", "https://openalex.org/W2964282857", "https://openalex.org/W2964329252", "https://openalex.org/W2990138404", "https://openalex.org/W2997574889", "https://openalex.org/W3037207827", "https://openalex.org/W3037211759", "https://openalex.org/W3093010610", "https://openalex.org/W3103780890", "https://openalex.org/W3105287169", "https://openalex.org/W3141952980", "https://openalex.org/W4214717370", "https://openalex.org/W4294226150", "https://openalex.org/W4294568686", "https://openalex.org/W4297744728", "https://openalex.org/W4297789121", "https://openalex.org/W4297809330", "https://openalex.org/W4299408792", "https://openalex.org/W4319988532", "https://openalex.org/W4320013936", "https://openalex.org/W6608171802", "https://openalex.org/W6623316541", "https://openalex.org/W6631821572", "https://openalex.org/W6635351661", "https://openalex.org/W6638286568", "https://openalex.org/W6640963894", "https://openalex.org/W6650972816", "https://openalex.org/W6657028776", "https://openalex.org/W6672056806", "https://openalex.org/W6680657880", "https://openalex.org/W6681096077", "https://openalex.org/W6683354640", "https://openalex.org/W6683390034", "https://openalex.org/W6685152286", "https://openalex.org/W6685755111", "https://openalex.org/W6688347243", "https://openalex.org/W6692963853", "https://openalex.org/W6701573534", "https://openalex.org/W6715501732", "https://openalex.org/W6718140377", "https://openalex.org/W6718991148", "https://openalex.org/W6731862918", "https://openalex.org/W6738087714", "https://openalex.org/W6740836278", "https://openalex.org/W6743541709", "https://openalex.org/W6744123322", "https://openalex.org/W6747082033", "https://openalex.org/W6747387971", "https://openalex.org/W6750253780", "https://openalex.org/W6750453880", "https://openalex.org/W6780225908", "https://openalex.org/W6780559895"], "related_works": ["https://openalex.org/W2952512863", "https://openalex.org/W3097774776", "https://openalex.org/W2187269125", "https://openalex.org/W3134504629", "https://openalex.org/W2938696877", "https://openalex.org/W4323911413", "https://openalex.org/W1982536061", "https://openalex.org/W4210631502", "https://openalex.org/W4286796787", "https://openalex.org/W2952582877"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2026, "cited_by_count": 9}, {"year": 2025, "cited_by_count": 23}, {"year": 2024, "cited_by_count": 46}, {"year": 2023, "cited_by_count": 33}, {"year": 2022, "cited_by_count": 37}, {"year": 2021, "cited_by_count": 50}, {"year": 2020, "cited_by_count": 52}, {"year": 2019, "cited_by_count": 34}, {"year": 2018, "cited_by_count": 13}], "updated_date": "2026-07-03T08:13:44.112507", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3042045548", "doi": "https://doi.org/10.1007/s13042-020-01167-7", "title": "Multi-agent reinforcement learning for redundant robot control in task-space", "display_name": "Multi-agent reinforcement learning for redundant robot control in task-space", "relevance_score": 236.1065, "publication_year": 2020, "publication_date": "2020-07-09", "ids": {"openalex": "https://openalex.org/W3042045548", "doi": "https://doi.org/10.1007/s13042-020-01167-7", "mag": "3042045548"}, "language": "en", "primary_location": {"id": "doi:10.1007/s13042-020-01167-7", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/s13042-020-01167-7", "pdf_url": null, "source": {"id": "https://openalex.org/S2764999920", "display_name": "International Journal of Machine Learning and Cybernetics", "issn_l": "1868-8071", "issn": ["1868-8071", "1868-808X"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "International Journal of Machine Learning and Cybernetics", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5065156490", "display_name": "Adolfo Perrusqu\u00eda", "orcid": "https://orcid.org/0000-0003-2290-1160"}, "institutions": [{"id": "https://openalex.org/I68368234", "display_name": "Centro de Investigaci\u00f3n y de Estudios Avanzados del Instituto Polit\u00e9cnico Nacional", "ror": "https://ror.org/009eqmr18", "country_code": "MX", "type": "facility", "lineage": ["https://openalex.org/I59361560", "https://openalex.org/I68368234"]}], "countries": ["MX"], "is_corresponding": false, "raw_author_name": "Adolfo Perrusqu\u00eda", "raw_affiliation_strings": ["Departamento de Control Autom\u00e1tico, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Departamento de Control Autom\u00e1tico, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico", "institution_ids": ["https://openalex.org/I68368234"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5008201587", "display_name": "Wen Yu", "orcid": "https://orcid.org/0000-0002-9540-7924"}, "institutions": [{"id": "https://openalex.org/I68368234", "display_name": "Centro de Investigaci\u00f3n y de Estudios Avanzados del Instituto Polit\u00e9cnico Nacional", "ror": "https://ror.org/009eqmr18", "country_code": "MX", "type": "facility", "lineage": ["https://openalex.org/I59361560", "https://openalex.org/I68368234"]}], "countries": ["MX"], "is_corresponding": true, "raw_author_name": "Wen Yu", "raw_affiliation_strings": ["Departamento de Control Autom\u00e1tico, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Departamento de Control Autom\u00e1tico, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico", "institution_ids": ["https://openalex.org/I68368234"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5100683740", "display_name": "Xiaoou Li", "orcid": "https://orcid.org/0000-0003-3087-7375"}, "institutions": [{"id": "https://openalex.org/I68368234", "display_name": "Centro de Investigaci\u00f3n y de Estudios Avanzados del Instituto Polit\u00e9cnico Nacional", "ror": "https://ror.org/009eqmr18", "country_code": "MX", "type": "facility", "lineage": ["https://openalex.org/I59361560", "https://openalex.org/I68368234"]}], "countries": ["MX"], "is_corresponding": false, "raw_author_name": "Xiaoou Li", "raw_affiliation_strings": ["Departamento de Computaci\u00f3n, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Departamento de Computaci\u00f3n, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico", "institution_ids": ["https://openalex.org/I68368234"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 3, "corresponding_author_ids": ["https://openalex.org/A5008201587"], "corresponding_institution_ids": ["https://openalex.org/I68368234"], "apc_list": {"value": 2790, "currency": "EUR", "value_usd": 3590}, "apc_paid": null, "fwci": 6.2658, "has_fulltext": false, "cited_by_count": 81, "citation_normalized_percentile": {"value": 0.97082334, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 96, "max": 100}, "biblio": {"volume": "12", "issue": "1", "first_page": "231", "last_page": "241"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9965999722480774, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10040", "display_name": "Adaptive Control of Nonlinear Systems", "score": 0.9860000014305115, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/jacobian-matrix-and-determinant", "display_name": "Jacobian matrix and determinant", "score": 0.7715538144111633}, {"id": "https://openalex.org/keywords/inverse-kinematics", "display_name": "Inverse kinematics", "score": 0.7142828702926636}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7079681158065796}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5996822714805603}, {"id": "https://openalex.org/keywords/kinematics", "display_name": "Kinematics", "score": 0.5811013579368591}, {"id": "https://openalex.org/keywords/artificial-neural-network", "display_name": "Artificial neural network", "score": 0.5181699991226196}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.4868899881839752}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.4809914827346802}, {"id": "https://openalex.org/keywords/robot-kinematics", "display_name": "Robot kinematics", "score": 0.4620141088962555}, {"id": "https://openalex.org/keywords/degrees-of-freedom", "display_name": "Degrees of freedom (physics and chemistry)", "score": 0.43239620327949524}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.2401578426361084}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.11279842257499695}], "concepts": [{"id": "https://openalex.org/C200331156", "wikidata": "https://www.wikidata.org/wiki/Q506041", "display_name": "Jacobian matrix and determinant", "level": 2, "score": 0.7715538144111633}, {"id": "https://openalex.org/C17816587", "wikidata": "https://www.wikidata.org/wiki/Q1501872", "display_name": "Inverse kinematics", "level": 3, "score": 0.7142828702926636}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7079681158065796}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5996822714805603}, {"id": "https://openalex.org/C39920418", "wikidata": "https://www.wikidata.org/wiki/Q11476", "display_name": "Kinematics", "level": 2, "score": 0.5811013579368591}, {"id": "https://openalex.org/C50644808", "wikidata": "https://www.wikidata.org/wiki/Q192776", "display_name": "Artificial neural network", "level": 2, "score": 0.5181699991226196}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.4868899881839752}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.4809914827346802}, {"id": "https://openalex.org/C74222875", "wikidata": "https://www.wikidata.org/wiki/Q16000312", "display_name": "Robot kinematics", "level": 4, "score": 0.4620141088962555}, {"id": "https://openalex.org/C208081375", "wikidata": "https://www.wikidata.org/wiki/Q274502", "display_name": "Degrees of freedom (physics and chemistry)", "level": 2, "score": 0.43239620327949524}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.2401578426361084}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.11279842257499695}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C62520636", "wikidata": "https://www.wikidata.org/wiki/Q944", "display_name": "Quantum mechanics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C74650414", "wikidata": "https://www.wikidata.org/wiki/Q11397", "display_name": "Classical mechanics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C28826006", "wikidata": "https://www.wikidata.org/wiki/Q33521", "display_name": "Applied mathematics", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1007/s13042-020-01167-7", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/s13042-020-01167-7", "pdf_url": null, "source": {"id": "https://openalex.org/S2764999920", "display_name": "International Journal of Machine Learning and Cybernetics", "issn_l": "1868-8071", "issn": ["1868-8071", "1868-808X"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "International Journal of Machine Learning and Cybernetics", "raw_type": "journal-article"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 53, "referenced_works": ["https://openalex.org/W206679605", "https://openalex.org/W613314169", "https://openalex.org/W1511864092", "https://openalex.org/W1579240551", "https://openalex.org/W1626155273", "https://openalex.org/W1966086707", "https://openalex.org/W1977655452", "https://openalex.org/W1979638690", "https://openalex.org/W1980417427", "https://openalex.org/W2009563859", "https://openalex.org/W2012587148", "https://openalex.org/W2021004298", "https://openalex.org/W2042882799", "https://openalex.org/W2065253294", "https://openalex.org/W2101745110", "https://openalex.org/W2102623571", "https://openalex.org/W2112566751", "https://openalex.org/W2121863487", "https://openalex.org/W2134140787", "https://openalex.org/W2134159355", "https://openalex.org/W2140135625", "https://openalex.org/W2141543600", "https://openalex.org/W2152161277", "https://openalex.org/W2152445738", "https://openalex.org/W2165131254", "https://openalex.org/W2165150801", "https://openalex.org/W2166513231", "https://openalex.org/W2294330364", "https://openalex.org/W2490314460", "https://openalex.org/W2519894412", "https://openalex.org/W2736601468", "https://openalex.org/W2758033803", "https://openalex.org/W2773735557", "https://openalex.org/W2790154185", "https://openalex.org/W2791291025", "https://openalex.org/W2792718656", "https://openalex.org/W2792799644", "https://openalex.org/W2898563784", "https://openalex.org/W2943867327", "https://openalex.org/W2956146357", "https://openalex.org/W2963252619", "https://openalex.org/W2965916140", "https://openalex.org/W2966272253", "https://openalex.org/W2972280472", "https://openalex.org/W2982651739", "https://openalex.org/W3000371092", "https://openalex.org/W3007973969", "https://openalex.org/W3009594751", "https://openalex.org/W3024744364", "https://openalex.org/W3144155561", "https://openalex.org/W4205513846", "https://openalex.org/W4214717370", "https://openalex.org/W6814003322"], "related_works": ["https://openalex.org/W2115240519", "https://openalex.org/W2080642692", "https://openalex.org/W2039927751", "https://openalex.org/W2744818472", "https://openalex.org/W1253671258", "https://openalex.org/W2731862817", "https://openalex.org/W4283266117", "https://openalex.org/W2355512144", "https://openalex.org/W3134555460", "https://openalex.org/W2607470227"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2026, "cited_by_count": 5}, {"year": 2025, "cited_by_count": 9}, {"year": 2024, "cited_by_count": 24}, {"year": 2023, "cited_by_count": 10}, {"year": 2022, "cited_by_count": 16}, {"year": 2021, "cited_by_count": 14}, {"year": 2020, "cited_by_count": 3}], "updated_date": "2026-06-19T15:47:20.252518", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W1525280637", "doi": "https://doi.org/10.1109/icra.2012.6225072", "title": "RTMBA: A Real-Time Model-Based Reinforcement Learning Architecture for robot control", "display_name": "RTMBA: A Real-Time Model-Based Reinforcement Learning Architecture for robot control", "relevance_score": 233.12651, "publication_year": 2012, "publication_date": "2012-05-01", "ids": {"openalex": "https://openalex.org/W1525280637", "doi": "https://doi.org/10.1109/icra.2012.6225072", "mag": "1525280637"}, "language": "en", "primary_location": {"id": "doi:10.1109/icra.2012.6225072", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/icra.2012.6225072", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "2012 IEEE International Conference on Robotics and Automation", "raw_type": "proceedings-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5048229171", "display_name": "Todd Hester", "orcid": null}, "institutions": [{"id": "https://openalex.org/I86519309", "display_name": "The University of Texas at Austin", "ror": "https://ror.org/00hj54h04", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I86519309"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Todd Hester", "raw_affiliation_strings": ["Department of Computer Science, University of Texas, Austin, USA", "Department of Computer Science, The University of Texas at Austin, USA,"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Computer Science, University of Texas, Austin, USA", "institution_ids": ["https://openalex.org/I86519309"]}, {"raw_affiliation_string": "Department of Computer Science, The University of Texas at Austin, USA,", "institution_ids": ["https://openalex.org/I86519309"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5074285715", "display_name": "Michael Quinlan", "orcid": null}, "institutions": [{"id": "https://openalex.org/I86519309", "display_name": "The University of Texas at Austin", "ror": "https://ror.org/00hj54h04", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I86519309"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Michael Quinlan", "raw_affiliation_strings": ["Department of Computer Science, University of Texas, Austin, USA", "Department of Computer Science, The University of Texas at Austin, USA,"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Computer Science, University of Texas, Austin, USA", "institution_ids": ["https://openalex.org/I86519309"]}, {"raw_affiliation_string": "Department of Computer Science, The University of Texas at Austin, USA,", "institution_ids": ["https://openalex.org/I86519309"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5001594330", "display_name": "Peter Stone", "orcid": "https://orcid.org/0000-0002-6795-420X"}, "institutions": [{"id": "https://openalex.org/I86519309", "display_name": "The University of Texas at Austin", "ror": "https://ror.org/00hj54h04", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I86519309"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Peter Stone", "raw_affiliation_strings": ["Department of Computer Science, University of Texas, Austin, USA", "Department of Computer Science, The University of Texas at Austin, USA,"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Computer Science, University of Texas, Austin, USA", "institution_ids": ["https://openalex.org/I86519309"]}, {"raw_affiliation_string": "Department of Computer Science, The University of Texas at Austin, USA,", "institution_ids": ["https://openalex.org/I86519309"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": [], "corresponding_institution_ids": ["https://openalex.org/I86519309"], "apc_list": null, "apc_paid": null, "fwci": 8.8614, "has_fulltext": false, "cited_by_count": 86, "citation_normalized_percentile": {"value": 0.9764613, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 94, "max": 99}, "biblio": {"volume": null, "issue": null, "first_page": "85", "last_page": "90"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.8855999708175659, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.8855999708175659, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11099", "display_name": "Autonomous Vehicle Technology and Safety", "score": 0.02070000022649765, "subfield": {"id": "https://openalex.org/subfields/2203", "display_name": "Automotive Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.019999999552965164, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8828479051589966}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.810511589050293}, {"id": "https://openalex.org/keywords/architecture", "display_name": "Architecture", "score": 0.6434780359268188}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.6390858888626099}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.592674195766449}, {"id": "https://openalex.org/keywords/process", "display_name": "Process (computing)", "score": 0.5577608346939087}, {"id": "https://openalex.org/keywords/robot-learning", "display_name": "Robot learning", "score": 0.5256758332252502}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.49630218744277954}, {"id": "https://openalex.org/keywords/action", "display_name": "Action (physics)", "score": 0.4356013834476471}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.36971479654312134}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.3013526201248169}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8828479051589966}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.810511589050293}, {"id": "https://openalex.org/C123657996", "wikidata": "https://www.wikidata.org/wiki/Q12271", "display_name": "Architecture", "level": 2, "score": 0.6434780359268188}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.6390858888626099}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.592674195766449}, {"id": "https://openalex.org/C98045186", "wikidata": "https://www.wikidata.org/wiki/Q205663", "display_name": "Process (computing)", "level": 2, "score": 0.5577608346939087}, {"id": "https://openalex.org/C188888258", "wikidata": "https://www.wikidata.org/wiki/Q7353390", "display_name": "Robot learning", "level": 4, "score": 0.5256758332252502}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.49630218744277954}, {"id": "https://openalex.org/C2780791683", "wikidata": "https://www.wikidata.org/wiki/Q846785", "display_name": "Action (physics)", "level": 2, "score": 0.4356013834476471}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.36971479654312134}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.3013526201248169}, {"id": "https://openalex.org/C153349607", "wikidata": "https://www.wikidata.org/wiki/Q36649", "display_name": "Visual arts", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C142362112", "wikidata": "https://www.wikidata.org/wiki/Q735", "display_name": "Art", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C62520636", "wikidata": "https://www.wikidata.org/wiki/Q944", "display_name": "Quantum mechanics", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 3, "locations": [{"id": "doi:10.1109/icra.2012.6225072", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/icra.2012.6225072", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "2012 IEEE International Conference on Robotics and Automation", "raw_type": "proceedings-article"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.221.3908", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.221.3908", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://www.cs.utexas.edu/%7Epstone/Papers/bib2html-links/ICRA12-hester.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.765.4538", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.765.4538", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://arxiv.org/pdf/1105.1749.pdf", "raw_type": "text"}], "best_oa_location": null, "sustainable_development_goals": [{"display_name": "Peace, Justice and strong institutions", "id": "https://metadata.un.org/sdg/16", "score": 0.6499999761581421}], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 31, "referenced_works": ["https://openalex.org/W1491843047", "https://openalex.org/W1515308897", "https://openalex.org/W1560495008", "https://openalex.org/W1590157819", "https://openalex.org/W1595483645", "https://openalex.org/W1625390266", "https://openalex.org/W2097778153", "https://openalex.org/W2103048296", "https://openalex.org/W2108734173", "https://openalex.org/W2116695029", "https://openalex.org/W2121863487", "https://openalex.org/W2132622533", "https://openalex.org/W2132676037", "https://openalex.org/W2140135625", "https://openalex.org/W2153192722", "https://openalex.org/W2161966552", "https://openalex.org/W2901136733", "https://openalex.org/W3011120880", "https://openalex.org/W3020747880", "https://openalex.org/W3023151133", "https://openalex.org/W4214717370", "https://openalex.org/W4242606736", "https://openalex.org/W4285719527", "https://openalex.org/W6636578284", "https://openalex.org/W6675223484", "https://openalex.org/W6677128644", "https://openalex.org/W6679365036", "https://openalex.org/W6680657880", "https://openalex.org/W6682567211", "https://openalex.org/W6683603353", "https://openalex.org/W6756486208"], "related_works": ["https://openalex.org/W2122871747", "https://openalex.org/W3114279067", "https://openalex.org/W2930863966", "https://openalex.org/W2126211886", "https://openalex.org/W3153786280", "https://openalex.org/W3127551068", "https://openalex.org/W4220829754", "https://openalex.org/W2165180011", "https://openalex.org/W3165944253", "https://openalex.org/W2351343564"], "abstract_inverted_index": {"Reinforcement": [0], "Learning": [1], "(RL)": [2], "is": [3, 112], "a": [4, 73, 104], "paradigm": [5], "for": [6, 29, 64, 77, 115], "learning": [7], "decision-making": [8], "tasks": [9, 150], "that": [10, 80, 108, 122, 151], "could": [11], "enable": [12], "robots": [13], "to": [14, 18, 26], "learn": [15, 35, 50], "and": [16, 93, 100, 144], "adapt": [17], "their": [19], "situation": [20], "on-line.": [21], "For": [22], "an": [23, 158], "RL": [24, 48, 79], "algorithm": [25], "be": [27], "practical": [28, 65], "robotic": [30], "control": [31, 118], "tasks,": [32], "it": [33], "must": [34], "in": [36, 44, 51, 82, 103], "very": [37], "few": [38, 53], "samples,": [39, 54], "while": [40], "continually": [41], "taking": [42, 86], "actions": [43, 154], "real-time.": [45], "Existing": [46], "model-based": [47, 78], "methods": [49, 92, 132, 148], "relatively": [52], "but": [55], "typically": [56], "take": [57], "too": [58], "much": [59], "time": [60], "between": [61], "each": [62], "action": [63], "on-line": [66], "learning.": [67], "In": [68], "this": [69, 125], "paper,": [70], "we": [71], "present": [72], "novel": [74, 105], "parallel": [75], "architecture": [76, 126, 137], "runs": [81], "real-time": [83, 153], "by": [84], "1)": [85], "advantage": [87], "of": [88], "sample-based": [89], "approximate": [90], "planning": [91, 101], "2)": [94], "parallelizing": [95], "the": [96, 109, 134], "acting,": [97], "model": [98], "learning,": [99], "processes": [102], "way": [106], "such": [107, 155], "acting": [110], "process": [111], "sufficiently": [113], "fast": [114], "typical": [116, 135], "robot": [117], "cycles.": [119], "We": [120], "demonstrate": [121], "algorithms": [123], "using": [124, 133], "perform": [127], "nearly": [128], "as": [129, 131, 156], "well": [130], "sequential": [136], "when": [138], "both": [139], "are": [140], "given": [141], "unlimited": [142], "time,": [143], "greatly": [145], "out-perform": [146], "these": [147], "on": [149], "require": [152], "controlling": [157], "autonomous": [159], "vehicle.": [160]}, "counts_by_year": [{"year": 2025, "cited_by_count": 8}, {"year": 2023, "cited_by_count": 6}, {"year": 2022, "cited_by_count": 4}, {"year": 2021, "cited_by_count": 8}, {"year": 2020, "cited_by_count": 4}, {"year": 2019, "cited_by_count": 5}, {"year": 2018, "cited_by_count": 5}, {"year": 2017, "cited_by_count": 5}, {"year": 2016, "cited_by_count": 5}, {"year": 2015, "cited_by_count": 7}, {"year": 2014, "cited_by_count": 2}, {"year": 2013, "cited_by_count": 8}, {"year": 2012, "cited_by_count": 3}], "updated_date": "2026-06-26T08:34:08.712188", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W1520597402", "doi": "https://doi.org/10.15607/rss.2011.vii.008", "title": "Learning to Control a Low-Cost Manipulator using Data-Efficient Reinforcement Learning", "display_name": "Learning to Control a Low-Cost Manipulator using Data-Efficient Reinforcement Learning", "relevance_score": 222.52773, "publication_year": 2011, "publication_date": "2011-06-27", "ids": {"openalex": "https://openalex.org/W1520597402", "doi": "https://doi.org/10.15607/rss.2011.vii.008", "mag": "1520597402"}, "language": "en", "primary_location": {"id": "doi:10.15607/rss.2011.vii.008", "is_oa": true, "landing_page_url": "https://doi.org/10.15607/rss.2011.vii.008", "pdf_url": "https://doi.org/10.15607/rss.2011.vii.008", "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Robotics: Science and Systems VII", "raw_type": "proceedings-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "gold", "oa_url": "https://doi.org/10.15607/rss.2011.vii.008", "any_repository_has_fulltext": null}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5001763022", "display_name": "Marc Peter Deisenroth", "orcid": "https://orcid.org/0000-0003-1503-680X"}, "institutions": [{"id": "https://openalex.org/I201448701", "display_name": "University of Washington", "ror": "https://ror.org/00cvxb145", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I201448701"]}, {"id": "https://openalex.org/I241749", "display_name": "University of Cambridge", "ror": "https://ror.org/013meh722", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I241749"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Marc Deisenroth", "raw_affiliation_strings": ["University of Washington Seattle, WA, USA", "Dept. of Engineering University of Cambridge Cambridge, UK", "Dept. of Computer Science & Engineering"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Washington Seattle, WA, USA", "institution_ids": ["https://openalex.org/I201448701"]}, {"raw_affiliation_string": "Dept. of Engineering University of Cambridge Cambridge, UK", "institution_ids": ["https://openalex.org/I241749"]}, {"raw_affiliation_string": "Dept. of Computer Science & Engineering", "institution_ids": []}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5108711177", "display_name": "Carl Edward Rasmussen", "orcid": null}, "institutions": [{"id": "https://openalex.org/I201448701", "display_name": "University of Washington", "ror": "https://ror.org/00cvxb145", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I201448701"]}, {"id": "https://openalex.org/I241749", "display_name": "University of Cambridge", "ror": "https://ror.org/013meh722", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I241749"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Carl Rasmussen", "raw_affiliation_strings": ["Dept. of Engineering University of Cambridge Cambridge, UK", "Dept. of Computer Science & Engineering", "University of Washington Seattle, WA, USA"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Dept. of Engineering University of Cambridge Cambridge, UK", "institution_ids": ["https://openalex.org/I241749"]}, {"raw_affiliation_string": "Dept. of Computer Science & Engineering", "institution_ids": []}, {"raw_affiliation_string": "University of Washington Seattle, WA, USA", "institution_ids": ["https://openalex.org/I201448701"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5108257764", "display_name": "Dieter Fox", "orcid": "https://orcid.org/0009-0009-4694-9127"}, "institutions": [{"id": "https://openalex.org/I201448701", "display_name": "University of Washington", "ror": "https://ror.org/00cvxb145", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I201448701"]}, {"id": "https://openalex.org/I241749", "display_name": "University of Cambridge", "ror": "https://ror.org/013meh722", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I241749"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Dieter Fox", "raw_affiliation_strings": ["Dept. of Computer Science & Engineering", "Dept. of Engineering University of Cambridge Cambridge, UK", "University of Washington Seattle, WA, USA"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Dept. of Computer Science & Engineering", "institution_ids": []}, {"raw_affiliation_string": "Dept. of Engineering University of Cambridge Cambridge, UK", "institution_ids": ["https://openalex.org/I241749"]}, {"raw_affiliation_string": "University of Washington Seattle, WA, USA", "institution_ids": ["https://openalex.org/I201448701"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 2, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": 22.7991, "has_fulltext": true, "cited_by_count": 216, "citation_normalized_percentile": {"value": 0.99468036, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 95, "max": 100}, "biblio": {"volume": null, "issue": null, "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.8486999869346619, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.8486999869346619, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.12060000002384186, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10868", "display_name": "Soft Robotics and Applications", "score": 0.0031999999191612005, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8589444160461426}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.737667977809906}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.5129877924919128}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5007965564727783}, {"id": "https://openalex.org/keywords/manipulator", "display_name": "Manipulator (device)", "score": 0.4781554639339447}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.3755597472190857}, {"id": "https://openalex.org/keywords/control-engineering", "display_name": "Control engineering", "score": 0.32502371072769165}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.23004716634750366}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.15846118330955505}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8589444160461426}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.737667977809906}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.5129877924919128}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5007965564727783}, {"id": "https://openalex.org/C2781347998", "wikidata": "https://www.wikidata.org/wiki/Q1587588", "display_name": "Manipulator (device)", "level": 3, "score": 0.4781554639339447}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.3755597472190857}, {"id": "https://openalex.org/C133731056", "wikidata": "https://www.wikidata.org/wiki/Q4917288", "display_name": "Control engineering", "level": 1, "score": 0.32502371072769165}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.23004716634750366}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.15846118330955505}], "mesh": [], "locations_count": 7, "locations": [{"id": "doi:10.15607/rss.2011.vii.008", "is_oa": true, "landing_page_url": "https://doi.org/10.15607/rss.2011.vii.008", "pdf_url": "https://doi.org/10.15607/rss.2011.vii.008", "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Robotics: Science and Systems VII", "raw_type": "proceedings-article"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.220.7598", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.220.7598", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://www.roboticsproceedings.org/rss07/p08.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.233.6687", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.233.6687", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://www.cs.washington.edu/homes/marc/publications/rss2011_revision.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.394.2335", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.394.2335", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://www.ias.informatik.tu-darmstadt.de/uploads/Publications/Deisenroth_RSS_2011.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.422.9188", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.422.9188", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://eprints.pascal-network.org/archive/00008309/01/rss2011_revision.pdf", "raw_type": "text"}, {"id": "pmh:oai:generic.eprints.org:755261", "is_oa": false, "landing_page_url": "http://publications.eng.cam.ac.uk/755261/", "pdf_url": null, "source": {"id": "https://openalex.org/S4406922847", "display_name": "Cambridge University Engineering Department Publications Database", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "Conference or Workshop Item"}, {"id": "pmh:oai:spiral.imperial.ac.uk:10044/1/11578", "is_oa": false, "landing_page_url": "http://hdl.handle.net/10044/1/11578", "pdf_url": null, "source": {"id": "https://openalex.org/S4306401396", "display_name": "Spiral (Imperial College London)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I47508984", "host_organization_name": "Imperial College London", "host_organization_lineage": ["https://openalex.org/I47508984"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "2011 Robotics: Science and Systems Conference", "raw_type": "Conference Paper"}], "best_oa_location": {"id": "doi:10.15607/rss.2011.vii.008", "is_oa": true, "landing_page_url": "https://doi.org/10.15607/rss.2011.vii.008", "pdf_url": "https://doi.org/10.15607/rss.2011.vii.008", "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Robotics: Science and Systems VII", "raw_type": "proceedings-article"}, "sustainable_development_goals": [], "awards": [{"id": "https://openalex.org/G6875778477", "display_name": null, "funder_award_id": "N00014-09-1-1052", "funder_id": "https://openalex.org/F4320337345", "funder_display_name": "Office of Naval Research"}, {"id": "https://openalex.org/G8876996369", "display_name": null, "funder_award_id": "N00014", "funder_id": "https://openalex.org/F4320337345", "funder_display_name": "Office of Naval Research"}], "funders": [{"id": "https://openalex.org/F4320333591", "display_name": "Multidisciplinary University Research Initiative", "ror": null}, {"id": "https://openalex.org/F4320337345", "display_name": "Office of Naval Research", "ror": "https://ror.org/00rk2pe57"}], "has_content": {"grobid_xml": true, "pdf": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W1520597402.pdf", "grobid_xml": "https://content.openalex.org/works/W1520597402.grobid-xml"}, "referenced_works_count": 31, "referenced_works": ["https://openalex.org/W1502922572", "https://openalex.org/W1532688806", "https://openalex.org/W1746819321", "https://openalex.org/W1827128995", "https://openalex.org/W1996625075", "https://openalex.org/W2007864935", "https://openalex.org/W2012392077", "https://openalex.org/W2089630413", "https://openalex.org/W2095342761", "https://openalex.org/W2127036288", "https://openalex.org/W2127107099", "https://openalex.org/W2130105540", "https://openalex.org/W2134469667", "https://openalex.org/W2139769245", "https://openalex.org/W2140135625", "https://openalex.org/W2146851580", "https://openalex.org/W2148112459", "https://openalex.org/W2151268438", "https://openalex.org/W2154032554", "https://openalex.org/W2162717641", "https://openalex.org/W2164032318", "https://openalex.org/W2179284380", "https://openalex.org/W2569188995", "https://openalex.org/W2613580378", "https://openalex.org/W2901136733", "https://openalex.org/W3037142887", "https://openalex.org/W4211049957", "https://openalex.org/W4229760642", "https://openalex.org/W4285526177", "https://openalex.org/W6682800787", "https://openalex.org/W6756486208"], "related_works": ["https://openalex.org/W4206669594", "https://openalex.org/W2961085424", "https://openalex.org/W2959276766", "https://openalex.org/W4295941380", "https://openalex.org/W260766989", "https://openalex.org/W3139193008", "https://openalex.org/W4306674287", "https://openalex.org/W3111983280", "https://openalex.org/W3164468573", "https://openalex.org/W4319083788"], "abstract_inverted_index": {"Over": [0], "the": [1, 91, 137, 142, 147, 151], "last": [2], "years,": [3], "there": [4], "has": [5], "been": [6], "substantial": [7], "progress": [8], "in": [9, 12, 70, 90, 121], "robust": [10], "manipulation": [11], "unstructured": [13], "environments.": [14], "The": [15], "long-term": [16, 126], "goal": [17], "of": [18, 74, 94, 132, 150], "our": [19], "work": [20, 92], "is": [21, 79, 109], "to": [22, 33], "get": [23], "away": [24], "from": [25], "precise,": [26], "but": [27], "very": [28], "expensive": [29], "robotic": [30, 60], "systems": [31, 40], "and": [32, 81, 115, 140], "develop": [34], "affordable,": [35], "potentially": [36], "imprecise,": [37], "self-adaptive": [38], "manipulator": [39, 78], "that": [41], "can": [42, 62], "interactively": [43], "perform": [44], "tasks": [45], "such": [46], "as": [47], "playing": [48], "with": [49, 117], "children.": [50], "In": [51], "this": [52], "paper,": [53], "we": [54, 99], "demonstrate": [55], "how": [56], "a": [57, 67, 72, 88, 95, 101, 122, 130], "low-cost": [58], "off-the-shelf": [59], "system": [61], "learn": [63], "closed-loop": [64], "policies": [65], "for": [66], "stacking": [68, 152], "task": [69], "only": [71], "handful": [73], "trials-from": [75], "scratch.": [76], "Our": [77, 106], "inaccurate": [80], "provides": [82], "no": [83], "pose": [84], "feedback.": [85], "For": [86], "learning": [87, 104, 107, 138, 143], "controller": [89], "space": [93], "Kinect-style": [96], "depth": [97], "camera,": [98], "use": [100], "model-based": [102], "reinforcement": [103], "technique.": [105], "method": [108], "data": [110], "efficient,": [111], "reduces": [112], "model": [113], "bias,": [114], "deals": [116], "several": [118], "noise": [119], "sources": [120], "principled": [123], "way": [124, 131], "during": [125], "planning.": [127], "We": [128], "present": [129], "incorporating": [133], "state-space": [134], "constraints": [135], "into": [136], "process": [139], "analyze": [141], "gain": [144], "by": [145], "exploiting": [146], "sequential": [148], "structure": [149], "task.": [153]}, "counts_by_year": [{"year": 2025, "cited_by_count": 2}, {"year": 2024, "cited_by_count": 3}, {"year": 2023, "cited_by_count": 12}, {"year": 2022, "cited_by_count": 9}, {"year": 2021, "cited_by_count": 23}, {"year": 2020, "cited_by_count": 31}, {"year": 2019, "cited_by_count": 32}, {"year": 2018, "cited_by_count": 26}, {"year": 2017, "cited_by_count": 13}, {"year": 2016, "cited_by_count": 18}, {"year": 2015, "cited_by_count": 12}, {"year": 2014, "cited_by_count": 11}, {"year": 2013, "cited_by_count": 15}, {"year": 2012, "cited_by_count": 6}], "updated_date": "2026-07-02T09:51:11.867554", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3016938688", "doi": "https://doi.org/10.1109/access.2020.2987820", "title": "Actor-Critic Deep Reinforcement Learning for Solving Job Shop Scheduling Problems", "display_name": "Actor-Critic Deep Reinforcement Learning for Solving Job Shop Scheduling Problems", "relevance_score": 215.9261, "publication_year": 2020, "publication_date": "2020-01-01", "ids": {"openalex": "https://openalex.org/W3016938688", "doi": "https://doi.org/10.1109/access.2020.2987820", "mag": "3016938688"}, "language": "en", "primary_location": {"id": "doi:10.1109/access.2020.2987820", "is_oa": true, "landing_page_url": "https://doi.org/10.1109/access.2020.2987820", "pdf_url": "https://ieeexplore.ieee.org/ielx7/6287639/8948470/09066984.pdf", "source": {"id": "https://openalex.org/S2485537415", "display_name": "IEEE Access", "issn_l": "2169-3536", "issn": ["2169-3536"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310319808", "host_organization_name": "Institute of Electrical and Electronics Engineers", "host_organization_lineage": ["https://openalex.org/P4310319808"], "host_organization_lineage_names": ["Institute of Electrical and Electronics Engineers"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "IEEE Access", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref", "doaj"], "open_access": {"is_oa": true, "oa_status": "gold", "oa_url": "https://ieeexplore.ieee.org/ielx7/6287639/8948470/09066984.pdf", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5031322893", "display_name": "Chien\u2010Liang Liu", "orcid": "https://orcid.org/0000-0002-2724-7199"}, "institutions": [{"id": "https://openalex.org/I148366613", "display_name": "National Yang Ming Chiao Tung University", "ror": "https://ror.org/00se2k293", "country_code": "TW", "type": "education", "lineage": ["https://openalex.org/I148366613"]}], "countries": ["TW"], "is_corresponding": false, "raw_author_name": "Chien-Liang Liu", "raw_affiliation_strings": ["Department of Industrial Engineering and Management, National Chiao Tung University, Hsinchu, Taiwan"], "raw_orcid": "https://orcid.org/0000-0002-2724-7199", "affiliations": [{"raw_affiliation_string": "Department of Industrial Engineering and Management, National Chiao Tung University, Hsinchu, Taiwan", "institution_ids": ["https://openalex.org/I148366613"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5034735972", "display_name": "Chuan-Chin Chang", "orcid": "https://orcid.org/0000-0001-5180-5181"}, "institutions": [{"id": "https://openalex.org/I148366613", "display_name": "National Yang Ming Chiao Tung University", "ror": "https://ror.org/00se2k293", "country_code": "TW", "type": "education", "lineage": ["https://openalex.org/I148366613"]}], "countries": ["TW"], "is_corresponding": false, "raw_author_name": "Chuan-Chin Chang", "raw_affiliation_strings": ["Department of Industrial Engineering and Management, National Chiao Tung University, Hsinchu, Taiwan"], "raw_orcid": "https://orcid.org/0000-0001-5180-5181", "affiliations": [{"raw_affiliation_string": "Department of Industrial Engineering and Management, National Chiao Tung University, Hsinchu, Taiwan", "institution_ids": ["https://openalex.org/I148366613"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5066575319", "display_name": "Chun-Jan Tseng", "orcid": "https://orcid.org/0000-0003-1611-4259"}, "institutions": [{"id": "https://openalex.org/I148366613", "display_name": "National Yang Ming Chiao Tung University", "ror": "https://ror.org/00se2k293", "country_code": "TW", "type": "education", "lineage": ["https://openalex.org/I148366613"]}], "countries": ["TW"], "is_corresponding": false, "raw_author_name": "Chun-Jan Tseng", "raw_affiliation_strings": ["Department of Industrial Engineering and Management, National Chiao Tung University, Hsinchu, Taiwan"], "raw_orcid": "https://orcid.org/0000-0003-1611-4259", "affiliations": [{"raw_affiliation_string": "Department of Industrial Engineering and Management, National Chiao Tung University, Hsinchu, Taiwan", "institution_ids": ["https://openalex.org/I148366613"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": [], "corresponding_institution_ids": ["https://openalex.org/I148366613"], "apc_list": {"value": 1850, "currency": "USD", "value_usd": 1850}, "apc_paid": {"value": 1850, "currency": "USD", "value_usd": 1850}, "fwci": 23.2539, "has_fulltext": true, "cited_by_count": 270, "citation_normalized_percentile": {"value": 0.9955463, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 97, "max": 100}, "biblio": {"volume": "8", "issue": null, "first_page": "71752", "last_page": "71762"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10551", "display_name": "Scheduling and Optimization Algorithms", "score": 0.9983000159263611, "subfield": {"id": "https://openalex.org/subfields/2209", "display_name": "Industrial and Manufacturing Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10551", "display_name": "Scheduling and Optimization Algorithms", "score": 0.9983000159263611, "subfield": {"id": "https://openalex.org/subfields/2209", "display_name": "Industrial and Manufacturing Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9927999973297119, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10603", "display_name": "Smart Grid Energy Management", "score": 0.9686999917030334, "subfield": {"id": "https://openalex.org/subfields/2208", "display_name": "Electrical and Electronic Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7410805225372314}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7279857993125916}, {"id": "https://openalex.org/keywords/job-shop-scheduling", "display_name": "Job shop scheduling", "score": 0.5487351417541504}, {"id": "https://openalex.org/keywords/scheduling", "display_name": "Scheduling (production processes)", "score": 0.47755202651023865}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.43182939291000366}, {"id": "https://openalex.org/keywords/mathematical-optimization", "display_name": "Mathematical optimization", "score": 0.24500158429145813}, {"id": "https://openalex.org/keywords/schedule", "display_name": "Schedule", "score": 0.08975398540496826}, {"id": "https://openalex.org/keywords/operating-system", "display_name": "Operating system", "score": 0.07418113946914673}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.07308503985404968}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7410805225372314}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7279857993125916}, {"id": "https://openalex.org/C55416958", "wikidata": "https://www.wikidata.org/wiki/Q6206757", "display_name": "Job shop scheduling", "level": 3, "score": 0.5487351417541504}, {"id": "https://openalex.org/C206729178", "wikidata": "https://www.wikidata.org/wiki/Q2271896", "display_name": "Scheduling (production processes)", "level": 2, "score": 0.47755202651023865}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.43182939291000366}, {"id": "https://openalex.org/C126255220", "wikidata": "https://www.wikidata.org/wiki/Q141495", "display_name": "Mathematical optimization", "level": 1, "score": 0.24500158429145813}, {"id": "https://openalex.org/C68387754", "wikidata": "https://www.wikidata.org/wiki/Q7271585", "display_name": "Schedule", "level": 2, "score": 0.08975398540496826}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.07418113946914673}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.07308503985404968}], "mesh": [], "locations_count": 2, "locations": [{"id": "doi:10.1109/access.2020.2987820", "is_oa": true, "landing_page_url": "https://doi.org/10.1109/access.2020.2987820", "pdf_url": "https://ieeexplore.ieee.org/ielx7/6287639/8948470/09066984.pdf", "source": {"id": "https://openalex.org/S2485537415", "display_name": "IEEE Access", "issn_l": "2169-3536", "issn": ["2169-3536"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310319808", "host_organization_name": "Institute of Electrical and Electronics Engineers", "host_organization_lineage": ["https://openalex.org/P4310319808"], "host_organization_lineage_names": ["Institute of Electrical and Electronics Engineers"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "IEEE Access", "raw_type": "journal-article"}, {"id": "pmh:oai:doaj.org/article:277036047ff34d28a6ed264651b182e6", "is_oa": true, "landing_page_url": "https://doaj.org/article/277036047ff34d28a6ed264651b182e6", "pdf_url": null, "source": {"id": "https://openalex.org/S4306401280", "display_name": "DOAJ (DOAJ: Directory of Open Access Journals)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by-sa", "license_id": "https://openalex.org/licenses/cc-by-sa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "IEEE Access, Vol 8, Pp 71752-71762 (2020)", "raw_type": "article"}], "best_oa_location": {"id": "doi:10.1109/access.2020.2987820", "is_oa": true, "landing_page_url": "https://doi.org/10.1109/access.2020.2987820", "pdf_url": "https://ieeexplore.ieee.org/ielx7/6287639/8948470/09066984.pdf", "source": {"id": "https://openalex.org/S2485537415", "display_name": "IEEE Access", "issn_l": "2169-3536", "issn": ["2169-3536"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310319808", "host_organization_name": "Institute of Electrical and Electronics Engineers", "host_organization_lineage": ["https://openalex.org/P4310319808"], "host_organization_lineage_names": ["Institute of Electrical and Electronics Engineers"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "IEEE Access", "raw_type": "journal-article"}, "sustainable_development_goals": [{"display_name": "Peace, Justice and strong institutions", "id": "https://metadata.un.org/sdg/16", "score": 0.5299999713897705}], "awards": [{"id": "https://openalex.org/G3695215382", "display_name": null, "funder_award_id": "MOST 107-2221-E-009-109-MY2", "funder_id": "https://openalex.org/F4320322795", "funder_display_name": "Ministry of Science and Technology, Taiwan"}], "funders": [{"id": "https://openalex.org/F4320322795", "display_name": "Ministry of Science and Technology, Taiwan", "ror": "https://ror.org/02kv4zf79"}], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 55, "referenced_works": ["https://openalex.org/W32403112", "https://openalex.org/W1484740474", "https://openalex.org/W1522301498", "https://openalex.org/W1551243188", "https://openalex.org/W2010890795", "https://openalex.org/W2011649121", "https://openalex.org/W2028145591", "https://openalex.org/W2049967755", "https://openalex.org/W2071306171", "https://openalex.org/W2078204340", "https://openalex.org/W2119567691", "https://openalex.org/W2121863487", "https://openalex.org/W2122967269", "https://openalex.org/W2131542691", "https://openalex.org/W2145339207", "https://openalex.org/W2156737235", "https://openalex.org/W2157846217", "https://openalex.org/W2166928920", "https://openalex.org/W2168079536", "https://openalex.org/W2169755132", "https://openalex.org/W2257979135", "https://openalex.org/W2532427336", "https://openalex.org/W2568019971", "https://openalex.org/W2571136349", "https://openalex.org/W2596367596", "https://openalex.org/W2735051146", "https://openalex.org/W2737844259", "https://openalex.org/W2747092504", "https://openalex.org/W2760509803", "https://openalex.org/W2766447205", "https://openalex.org/W2774422817", "https://openalex.org/W2802314172", "https://openalex.org/W2807766588", "https://openalex.org/W2924645442", "https://openalex.org/W2962979969", "https://openalex.org/W2963864421", "https://openalex.org/W2964043796", "https://openalex.org/W2964536243", "https://openalex.org/W2971663057", "https://openalex.org/W2972148356", "https://openalex.org/W2982316857", "https://openalex.org/W3036035835", "https://openalex.org/W3134587112", "https://openalex.org/W4214717370", "https://openalex.org/W4252732074", "https://openalex.org/W4295138992", "https://openalex.org/W4298023569", "https://openalex.org/W6628916341", "https://openalex.org/W6631190155", "https://openalex.org/W6683195989", "https://openalex.org/W6684921986", "https://openalex.org/W6692846177", "https://openalex.org/W6735641298", "https://openalex.org/W6748487558", "https://openalex.org/W6779748562"], "related_works": ["https://openalex.org/W4391375266", "https://openalex.org/W2899084033", "https://openalex.org/W2748952813", "https://openalex.org/W3172150420", "https://openalex.org/W2107725657", "https://openalex.org/W2390279801", "https://openalex.org/W4306904969", "https://openalex.org/W4391913857", "https://openalex.org/W2358668433", "https://openalex.org/W3204654320"], "abstract_inverted_index": {"In": [0], "the": [1, 20, 27, 36, 40, 63, 111, 115, 154, 183], "past": [2], "decades,": [3], "many": [4, 47], "optimization": [5], "methods": [6, 24], "have": [7], "been": [8], "devised": [9], "and": [10, 54, 77, 94, 106, 127, 134, 197, 242, 249, 267], "applied": [11, 31], "to": [12, 18, 32, 60, 79, 84, 143, 160, 181], "job": [13, 65], "shop": [14], "scheduling": [15, 28], "problem": [16, 76, 223], "(JSSP)": [17], "find": [19], "optimal": [21], "solution.": [22], "Many": [23], "assumed": [25], "that": [26, 110, 216, 232], "results": [29, 230], "were": [30], "static": [33, 238, 263], "environments,": [34], "but": [35], "whole": [37, 116, 186], "environments": [38], "in": [39, 102, 145, 219, 237, 252, 262, 269], "real": [41], "world": [42], "are": [43, 202, 217], "always": [44], "dynamic.": [45], "Moreover,": [46], "unexpected": [48], "events": [49], "such": [50], "as": [51, 71, 100, 173, 175, 204], "machine": [52], "breakdowns": [53], "material": [55], "problems": [56], "may": [57], "be": [58], "present": [59, 218], "adversely": [61], "affect": [62], "initial": [64], "scheduling.": [66], "This": [67, 163], "work": [68, 164], "views": [69], "JSSP": [70, 239, 264], "a": [72, 166, 194, 220, 244], "sequential": [73], "decision": [74], "making": [75], "proposes": [78, 165], "use": [80], "deep": [81, 92, 176], "reinforcement": [82, 95, 104], "learning": [83, 93, 96, 117], "cope": [85], "with": [86, 190], "this": [87], "problem.": [88], "The": [89, 185, 228], "combination": [90, 112], "of": [91, 156, 257], "avoids": [97], "handcraft": [98], "features": [99], "used": [101], "traditional": [103], "learning,": [105], "it": [107], "is": [108, 188, 235, 260], "expected": [109], "will": [113], "make": [114], "phase": [118], "more": [119, 212], "efficient.": [120], "Our": [121], "proposed": [122, 209], "model": [123, 210], "comprises": [124], "actor": [125, 161], "network": [126, 139, 150, 187], "critic": [128, 149], "network,": [129], "both": [130], "including": [131], "convolution": [132], "layers": [133], "fully": [135], "connected": [136], "layer.": [137], "Actor": [138], "agent": [140, 152], "learns": [141], "how": [142], "behave": [144], "different": [146, 198], "situations,": [147], "while": [148], "helps": [151], "evaluate": [153, 207], "value": [155], "statement": [157], "then": [158], "return": [159], "network.": [162], "parallel": [167, 191], "training": [168, 192], "method,": [169], "combining": [170], "asynchronous": [171], "update": [172], "well": [174], "deterministic": [177], "policy": [178], "gradient": [179], "(DDPG),": [180], "train": [182], "model.": [184], "trained": [189], "on": [193, 211], "multi-agent": [195], "environment": [196], "simple": [199], "dispatching": [200], "rules": [201], "considered": [203], "actions.": [205], "We": [206], "our": [208, 233, 258], "than": [213], "ten": [214], "instances": [215], "famous": [221], "benchmark": [222, 240, 265], "library": [224], "-": [225], "OR": [226], "library.": [227], "evaluation": [229], "indicate": [231], "method": [234, 259], "comparative": [236], "problems,": [241, 266], "achieves": [243], "good": [245], "balance": [246], "between": [247], "makespan": [248], "execution": [250], "time": [251], "dynamic": [253, 270], "environments.": [254, 271], "Scheduling": [255], "score": [256], "91.12%": [261], "80.78%": [268]}, "counts_by_year": [{"year": 2026, "cited_by_count": 17}, {"year": 2025, "cited_by_count": 59}, {"year": 2024, "cited_by_count": 64}, {"year": 2023, "cited_by_count": 57}, {"year": 2022, "cited_by_count": 44}, {"year": 2021, "cited_by_count": 25}, {"year": 2020, "cited_by_count": 4}], "updated_date": "2026-07-01T08:55:40.977307", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2877093712", "doi": "https://doi.org/10.24963/ijcai.2018/820", "title": "Towards Sample Efficient Reinforcement Learning", "display_name": "Towards Sample Efficient Reinforcement Learning", "relevance_score": 207.4553, "publication_year": 2018, "publication_date": "2018-07-01", "ids": {"openalex": "https://openalex.org/W2877093712", "doi": "https://doi.org/10.24963/ijcai.2018/820", "mag": "2877093712"}, "language": "en", "primary_location": {"id": "doi:10.24963/ijcai.2018/820", "is_oa": true, "landing_page_url": "https://doi.org/10.24963/ijcai.2018/820", "pdf_url": "https://www.ijcai.org/proceedings/2018/0820.pdf", "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence", "raw_type": "proceedings-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "gold", "oa_url": "https://www.ijcai.org/proceedings/2018/0820.pdf", "any_repository_has_fulltext": null}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5100342263", "display_name": "Yang Yu", "orcid": "https://orcid.org/0000-0002-1732-9545"}, "institutions": [{"id": "https://openalex.org/I881766915", "display_name": "Nanjing University", "ror": "https://ror.org/01rxvg760", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I881766915"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Yang Yu", "raw_affiliation_strings": ["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing 210023, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing 210023, China", "institution_ids": ["https://openalex.org/I881766915"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": ["https://openalex.org/A5100342263"], "corresponding_institution_ids": ["https://openalex.org/I881766915"], "apc_list": null, "apc_paid": null, "fwci": 6.1384, "has_fulltext": true, "cited_by_count": 146, "citation_normalized_percentile": {"value": 0.97035481, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 90, "max": 100}, "biblio": {"volume": null, "issue": null, "first_page": "5739", "last_page": "5743"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12101", "display_name": "Advanced Bandit Algorithms Research", "score": 0.9908000230789185, "subfield": {"id": "https://openalex.org/subfields/1803", "display_name": "Management Science and Operations Research"}, "field": {"id": "https://openalex.org/fields/18", "display_name": "Decision Sciences"}, "domain": {"id": "https://openalex.org/domains/2", "display_name": "Social Sciences"}}, {"id": "https://openalex.org/T11689", "display_name": "Adversarial Robustness in Machine Learning", "score": 0.9894999861717224, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.9391586780548096}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7789284586906433}, {"id": "https://openalex.org/keywords/abstraction", "display_name": "Abstraction", "score": 0.6499032974243164}, {"id": "https://openalex.org/keywords/sample-complexity", "display_name": "Sample complexity", "score": 0.6225146651268005}, {"id": "https://openalex.org/keywords/sample", "display_name": "Sample (material)", "score": 0.6132881045341492}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5387426614761353}, {"id": "https://openalex.org/keywords/reinforcement", "display_name": "Reinforcement", "score": 0.5254805684089661}, {"id": "https://openalex.org/keywords/human\u2013computer-interaction", "display_name": "Human\u2013computer interaction", "score": 0.44434884190559387}, {"id": "https://openalex.org/keywords/transfer-of-learning", "display_name": "Transfer of learning", "score": 0.42425358295440674}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.38415876030921936}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.12707185745239258}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.9391586780548096}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7789284586906433}, {"id": "https://openalex.org/C124304363", "wikidata": "https://www.wikidata.org/wiki/Q673661", "display_name": "Abstraction", "level": 2, "score": 0.6499032974243164}, {"id": "https://openalex.org/C2778445095", "wikidata": "https://www.wikidata.org/wiki/Q18354077", "display_name": "Sample complexity", "level": 2, "score": 0.6225146651268005}, {"id": "https://openalex.org/C198531522", "wikidata": "https://www.wikidata.org/wiki/Q485146", "display_name": "Sample (material)", "level": 2, "score": 0.6132881045341492}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5387426614761353}, {"id": "https://openalex.org/C67203356", "wikidata": "https://www.wikidata.org/wiki/Q1321905", "display_name": "Reinforcement", "level": 2, "score": 0.5254805684089661}, {"id": "https://openalex.org/C107457646", "wikidata": "https://www.wikidata.org/wiki/Q207434", "display_name": "Human\u2013computer interaction", "level": 1, "score": 0.44434884190559387}, {"id": "https://openalex.org/C150899416", "wikidata": "https://www.wikidata.org/wiki/Q1820378", "display_name": "Transfer of learning", "level": 2, "score": 0.42425358295440674}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.38415876030921936}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.12707185745239258}, {"id": "https://openalex.org/C138885662", "wikidata": "https://www.wikidata.org/wiki/Q5891", "display_name": "Philosophy", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C111472728", "wikidata": "https://www.wikidata.org/wiki/Q9471", "display_name": "Epistemology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C43617362", "wikidata": "https://www.wikidata.org/wiki/Q170050", "display_name": "Chromatography", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C185592680", "wikidata": "https://www.wikidata.org/wiki/Q2329", "display_name": "Chemistry", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C66938386", "wikidata": "https://www.wikidata.org/wiki/Q633538", "display_name": "Structural engineering", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.24963/ijcai.2018/820", "is_oa": true, "landing_page_url": "https://doi.org/10.24963/ijcai.2018/820", "pdf_url": "https://www.ijcai.org/proceedings/2018/0820.pdf", "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence", "raw_type": "proceedings-article"}], "best_oa_location": {"id": "doi:10.24963/ijcai.2018/820", "is_oa": true, "landing_page_url": "https://doi.org/10.24963/ijcai.2018/820", "pdf_url": "https://www.ijcai.org/proceedings/2018/0820.pdf", "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence", "raw_type": "proceedings-article"}, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"grobid_xml": true, "pdf": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W2877093712.pdf", "grobid_xml": "https://content.openalex.org/works/W2877093712.grobid-xml"}, "referenced_works_count": 47, "referenced_works": ["https://openalex.org/W41554520", "https://openalex.org/W1573527757", "https://openalex.org/W1576660662", "https://openalex.org/W1771410628", "https://openalex.org/W2004030284", "https://openalex.org/W2109910161", "https://openalex.org/W2131241448", "https://openalex.org/W2139612737", "https://openalex.org/W2145339207", "https://openalex.org/W2158548602", "https://openalex.org/W2160371091", "https://openalex.org/W2174817438", "https://openalex.org/W2257979135", "https://openalex.org/W2342662072", "https://openalex.org/W2530887700", "https://openalex.org/W2573774999", "https://openalex.org/W2590513847", "https://openalex.org/W2594829461", "https://openalex.org/W2604763608", "https://openalex.org/W2604877292", "https://openalex.org/W2606433045", "https://openalex.org/W2614839826", "https://openalex.org/W2623491082", "https://openalex.org/W2724169821", "https://openalex.org/W2770392471", "https://openalex.org/W2785538451", "https://openalex.org/W2787757704", "https://openalex.org/W2788171925", "https://openalex.org/W2789543555", "https://openalex.org/W2790532312", "https://openalex.org/W2808577099", "https://openalex.org/W2809162153", "https://openalex.org/W2949267040", "https://openalex.org/W2962924847", "https://openalex.org/W2963523627", "https://openalex.org/W2963654596", "https://openalex.org/W2963687836", "https://openalex.org/W2964036701", "https://openalex.org/W2964227312", "https://openalex.org/W3021208093", "https://openalex.org/W3101442004", "https://openalex.org/W4214717370", "https://openalex.org/W4293396018", "https://openalex.org/W4293713786", "https://openalex.org/W4294175339", "https://openalex.org/W4394662461", "https://openalex.org/W4394672593"], "related_works": ["https://openalex.org/W4310083477", "https://openalex.org/W2328553770", "https://openalex.org/W2920061524", "https://openalex.org/W1977959518", "https://openalex.org/W2038908348", "https://openalex.org/W2877093712", "https://openalex.org/W2116157560", "https://openalex.org/W171213352", "https://openalex.org/W4310614650", "https://openalex.org/W4386738330"], "abstract_inverted_index": {"Reinforcement": [0], "learning": [1, 22, 39], "is": [2], "a": [3, 46], "major": [4], "tool": [5], "to": [6, 15, 75], "realize": [7], "intelligent": [8], "agents": [9], "that": [10], "can": [11], "be": [12], "autonomously": [13], "adaptive": [14], "the": [16, 69, 77, 84, 104], "environment.": [17], "With": [18], "deep": [19], "models,": [20], "reinforcement": [21, 38, 81], "has": [23], "shown": [24], "great": [25], "potential": [26], "in": [27, 55, 58, 100], "complex": [28], "tasks": [29], "such": [30], "as": [31], "playing": [32], "games": [33], "from": [34, 44, 83], "pixels.": [35], "However,": [36], "current": [37], "techniques": [40], "are": [41], "still": [42], "suffer": [43], "requiring": [45], "huge": [47], "amount": [48], "of": [49, 68, 80, 86, 106], "interaction": [50], "data,": [51], "which": [52], "could": [53], "result": [54], "unbearable": [56], "cost": [57, 79], "real-world": [59, 101], "applications.": [60], "In": [61], "this": [62], "article,": [63], "we": [64], "share": [65], "our": [66], "understanding": [67], "problem,": [70], "and": [71, 93], "discuss": [72, 97], "possible": [73], "ways": [74], "alleviate": [76], "sample": [78], "learning,": [82], "aspects": [85], "exploration,": [87], "optimization,": [88], "environment": [89], "modeling,": [90], "experience": [91], "transfer,": [92], "abstraction.": [94], "We": [95], "also": [96], "some": [98], "challenges": [99], "applications,": [102], "with": [103], "hope": [105], "inspiring": [107], "future": [108], "researches.": [109]}, "counts_by_year": [{"year": 2026, "cited_by_count": 2}, {"year": 2025, "cited_by_count": 28}, {"year": 2024, "cited_by_count": 32}, {"year": 2023, "cited_by_count": 22}, {"year": 2022, "cited_by_count": 25}, {"year": 2021, "cited_by_count": 18}, {"year": 2020, "cited_by_count": 9}, {"year": 2019, "cited_by_count": 9}, {"year": 2012, "cited_by_count": 1}], "updated_date": "2026-06-30T13:55:48.251075", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2742169147", "doi": "https://doi.org/10.1007/978-3-319-65289-4_17", "title": "Toward Effective Soft Robot Control via Reinforcement Learning", "display_name": "Toward Effective Soft Robot Control via Reinforcement Learning", "relevance_score": 197.0856, "publication_year": 2017, "publication_date": "2017-01-01", "ids": {"openalex": "https://openalex.org/W2742169147", "doi": "https://doi.org/10.1007/978-3-319-65289-4_17", "mag": "2742169147"}, "language": "en", "primary_location": {"id": "doi:10.1007/978-3-319-65289-4_17", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/978-3-319-65289-4_17", "pdf_url": null, "source": {"id": "https://openalex.org/S106296714", "display_name": "Lecture notes in computer science", "issn_l": "0302-9743", "issn": ["0302-9743", "1611-3349"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "book series"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Lecture Notes in Computer Science", "raw_type": "book-chapter"}, "type": "book-chapter", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5018060361", "display_name": "Haochong Zhang", "orcid": null}, "institutions": [{"id": "https://openalex.org/I126520041", "display_name": "University of Science and Technology of China", "ror": "https://ror.org/04c4dkn09", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I126520041", "https://openalex.org/I19820366"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Haochong Zhang", "raw_affiliation_strings": ["University of Science and Technology of China, Hefei, 230027, Anhui, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Science and Technology of China, Hefei, 230027, Anhui, China", "institution_ids": ["https://openalex.org/I126520041"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5009602756", "display_name": "Rongyun Cao", "orcid": null}, "institutions": [{"id": "https://openalex.org/I126520041", "display_name": "University of Science and Technology of China", "ror": "https://ror.org/04c4dkn09", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I126520041", "https://openalex.org/I19820366"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Rongyun Cao", "raw_affiliation_strings": ["University of Science and Technology of China, Hefei, 230027, Anhui, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Science and Technology of China, Hefei, 230027, Anhui, China", "institution_ids": ["https://openalex.org/I126520041"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5027224308", "display_name": "Shlomo Zilberstein", "orcid": "https://orcid.org/0000-0001-9817-7848"}, "institutions": [{"id": "https://openalex.org/I24603500", "display_name": "University of Massachusetts Amherst", "ror": "https://ror.org/0072zz521", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I24603500"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Shlomo Zilberstein", "raw_affiliation_strings": ["University of Massachusetts Amherst, Amherst, MA, 01003-9264, USA"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Massachusetts Amherst, Amherst, MA, 01003-9264, USA", "institution_ids": ["https://openalex.org/I24603500"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5029102289", "display_name": "Feng Wu", "orcid": "https://orcid.org/0000-0003-3989-0509"}, "institutions": [{"id": "https://openalex.org/I126520041", "display_name": "University of Science and Technology of China", "ror": "https://ror.org/04c4dkn09", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I126520041", "https://openalex.org/I19820366"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Feng Wu", "raw_affiliation_strings": ["University of Science and Technology of China, Hefei, 230027, Anhui, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Science and Technology of China, Hefei, 230027, Anhui, China", "institution_ids": ["https://openalex.org/I126520041"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5084710341", "display_name": "Xiaoping Chen", "orcid": "https://orcid.org/0000-0001-8992-9286"}, "institutions": [{"id": "https://openalex.org/I126520041", "display_name": "University of Science and Technology of China", "ror": "https://ror.org/04c4dkn09", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I126520041", "https://openalex.org/I19820366"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Xiaoping Chen", "raw_affiliation_strings": ["University of Science and Technology of China, Hefei, 230027, Anhui, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Science and Technology of China, Hefei, 230027, Anhui, China", "institution_ids": ["https://openalex.org/I126520041"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 5, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": {"value": 5000, "currency": "EUR", "value_usd": 5392}, "apc_paid": null, "fwci": 8.2274, "has_fulltext": false, "cited_by_count": 48, "citation_normalized_percentile": {"value": 0.98322851, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 94, "max": 99}, "biblio": {"volume": null, "issue": null, "first_page": "173", "last_page": "184"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10868", "display_name": "Soft Robotics and Applications", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10868", "display_name": "Soft Robotics and Applications", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11486", "display_name": "Micro and Nano Robotics", "score": 0.9959999918937683, "subfield": {"id": "https://openalex.org/subfields/3104", "display_name": "Condensed Matter Physics"}, "field": {"id": "https://openalex.org/fields/31", "display_name": "Physics and Astronomy"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12784", "display_name": "Modular Robots and Swarm Intelligence", "score": 0.9865000247955322, "subfield": {"id": "https://openalex.org/subfields/2210", "display_name": "Mechanical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8420349359512329}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.8071468472480774}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7579704523086548}, {"id": "https://openalex.org/keywords/process", "display_name": "Process (computing)", "score": 0.5487931966781616}, {"id": "https://openalex.org/keywords/representation", "display_name": "Representation (politics)", "score": 0.5265448689460754}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5240036249160767}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.49410489201545715}, {"id": "https://openalex.org/keywords/key", "display_name": "Key (lock)", "score": 0.48107075691223145}, {"id": "https://openalex.org/keywords/soft-robotics", "display_name": "Soft robotics", "score": 0.47242581844329834}, {"id": "https://openalex.org/keywords/robot-control", "display_name": "Robot control", "score": 0.4267944395542145}, {"id": "https://openalex.org/keywords/control-engineering", "display_name": "Control engineering", "score": 0.4089146554470062}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.2290496826171875}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.11558309197425842}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8420349359512329}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.8071468472480774}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7579704523086548}, {"id": "https://openalex.org/C98045186", "wikidata": "https://www.wikidata.org/wiki/Q205663", "display_name": "Process (computing)", "level": 2, "score": 0.5487931966781616}, {"id": "https://openalex.org/C2776359362", "wikidata": "https://www.wikidata.org/wiki/Q2145286", "display_name": "Representation (politics)", "level": 3, "score": 0.5265448689460754}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5240036249160767}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.49410489201545715}, {"id": "https://openalex.org/C26517878", "wikidata": "https://www.wikidata.org/wiki/Q228039", "display_name": "Key (lock)", "level": 2, "score": 0.48107075691223145}, {"id": "https://openalex.org/C2776058767", "wikidata": "https://www.wikidata.org/wiki/Q24327151", "display_name": "Soft robotics", "level": 3, "score": 0.47242581844329834}, {"id": "https://openalex.org/C65401140", "wikidata": "https://www.wikidata.org/wiki/Q7353385", "display_name": "Robot control", "level": 4, "score": 0.4267944395542145}, {"id": "https://openalex.org/C133731056", "wikidata": "https://www.wikidata.org/wiki/Q4917288", "display_name": "Control engineering", "level": 1, "score": 0.4089146554470062}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.2290496826171875}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.11558309197425842}, {"id": "https://openalex.org/C17744445", "wikidata": "https://www.wikidata.org/wiki/Q36442", "display_name": "Political science", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C38652104", "wikidata": "https://www.wikidata.org/wiki/Q3510521", "display_name": "Computer security", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C94625758", "wikidata": "https://www.wikidata.org/wiki/Q7163", "display_name": "Politics", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C199539241", "wikidata": "https://www.wikidata.org/wiki/Q7748", "display_name": "Law", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1007/978-3-319-65289-4_17", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/978-3-319-65289-4_17", "pdf_url": null, "source": {"id": "https://openalex.org/S106296714", "display_name": "Lecture notes in computer science", "issn_l": "0302-9743", "issn": ["0302-9743", "1611-3349"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "book series"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Lecture Notes in Computer Science", "raw_type": "book-chapter"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 30, "referenced_works": ["https://openalex.org/W199177729", "https://openalex.org/W640042938", "https://openalex.org/W1227556793", "https://openalex.org/W1515749184", "https://openalex.org/W1521258934", "https://openalex.org/W1594979435", "https://openalex.org/W1646707810", "https://openalex.org/W1730458554", "https://openalex.org/W1921016406", "https://openalex.org/W1969107377", "https://openalex.org/W2027454283", "https://openalex.org/W2063507564", "https://openalex.org/W2063797945", "https://openalex.org/W2073677587", "https://openalex.org/W2097770951", "https://openalex.org/W2101667962", "https://openalex.org/W2105271646", "https://openalex.org/W2124540383", "https://openalex.org/W2141905643", "https://openalex.org/W2164102968", "https://openalex.org/W2230336266", "https://openalex.org/W2346769424", "https://openalex.org/W2394644742", "https://openalex.org/W2400243651", "https://openalex.org/W2560730116", "https://openalex.org/W2742015494", "https://openalex.org/W2965916140", "https://openalex.org/W3137141854", "https://openalex.org/W4211089519", "https://openalex.org/W4244367163"], "related_works": ["https://openalex.org/W4306904969", "https://openalex.org/W2138720691", "https://openalex.org/W4362501864", "https://openalex.org/W4380318855", "https://openalex.org/W3084456289", "https://openalex.org/W2024136090", "https://openalex.org/W2171912896", "https://openalex.org/W2542723153", "https://openalex.org/W2021541810", "https://openalex.org/W2133150803"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2025, "cited_by_count": 5}, {"year": 2024, "cited_by_count": 8}, {"year": 2023, "cited_by_count": 4}, {"year": 2022, "cited_by_count": 13}, {"year": 2021, "cited_by_count": 6}, {"year": 2020, "cited_by_count": 5}, {"year": 2019, "cited_by_count": 3}, {"year": 2018, "cited_by_count": 2}, {"year": 2017, "cited_by_count": 2}], "updated_date": "2026-06-11T09:08:48.828518", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W4366680618", "doi": "https://doi.org/10.34133/icomputing.0025", "title": "Evolutionary Reinforcement Learning: A Survey", "display_name": "Evolutionary Reinforcement Learning: A Survey", "relevance_score": 196.66873, "publication_year": 2023, "publication_date": "2023-01-01", "ids": {"openalex": "https://openalex.org/W4366680618", "doi": "https://doi.org/10.34133/icomputing.0025"}, "language": "en", "primary_location": {"id": "doi:10.34133/icomputing.0025", "is_oa": true, "landing_page_url": "https://doi.org/10.34133/icomputing.0025", "pdf_url": null, "source": {"id": "https://openalex.org/S4387281904", "display_name": "Intelligent Computing", "issn_l": "2771-5892", "issn": ["2771-5892"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Intelligent Computing", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref", "doaj"], "open_access": {"is_oa": true, "oa_status": "diamond", "oa_url": "https://doi.org/10.34133/icomputing.0025", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5103244724", "display_name": "Hui Bai", "orcid": "https://orcid.org/0000-0001-8947-8913"}, "institutions": [{"id": "https://openalex.org/I3045169105", "display_name": "Southern University of Science and Technology", "ror": "https://ror.org/049tv2d57", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I3045169105"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Hui Bai", "raw_affiliation_strings": ["Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China", "institution_ids": ["https://openalex.org/I3045169105"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5004036087", "display_name": "Ran Cheng", "orcid": "https://orcid.org/0000-0001-9410-8263"}, "institutions": [{"id": "https://openalex.org/I3045169105", "display_name": "Southern University of Science and Technology", "ror": "https://ror.org/049tv2d57", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I3045169105"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Ran Cheng", "raw_affiliation_strings": ["Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China", "institution_ids": ["https://openalex.org/I3045169105"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5032314861", "display_name": "Yaochu Jin", "orcid": "https://orcid.org/0000-0003-1100-0631"}, "institutions": [{"id": "https://openalex.org/I20121455", "display_name": "Bielefeld University", "ror": "https://ror.org/02hpadn98", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I20121455"]}, {"id": "https://openalex.org/I28290843", "display_name": "University of Surrey", "ror": "https://ror.org/00ks66431", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I28290843"]}], "countries": ["DE", "GB"], "is_corresponding": false, "raw_author_name": "Yaochu Jin", "raw_affiliation_strings": ["Department of Computer Science, University of Surrey, Guildford, Surrey GU2 7XH, UK", "Faculty of Technology, Bielefeld University, 33615 Bielefeld, Germany"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Computer Science, University of Surrey, Guildford, Surrey GU2 7XH, UK", "institution_ids": ["https://openalex.org/I28290843"]}, {"raw_affiliation_string": "Faculty of Technology, Bielefeld University, 33615 Bielefeld, Germany", "institution_ids": ["https://openalex.org/I20121455"]}]}], "institutions": [], "countries_distinct_count": 3, "institutions_distinct_count": 3, "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": 12.62, "has_fulltext": false, "cited_by_count": 79, "citation_normalized_percentile": {"value": 0.99033284, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 97, "max": 100}, "biblio": {"volume": "2", "issue": null, "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9988999962806702, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9988999962806702, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11975", "display_name": "Evolutionary Algorithms and Applications", "score": 0.998199999332428, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10848", "display_name": "Advanced Multi-Objective Optimization Algorithms", "score": 0.9945999979972839, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8484776616096497}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.6870969533920288}, {"id": "https://openalex.org/keywords/scalability", "display_name": "Scalability", "score": 0.5257676243782043}, {"id": "https://openalex.org/keywords/hyperparameter", "display_name": "Hyperparameter", "score": 0.5211687088012695}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5168502926826477}, {"id": "https://openalex.org/keywords/population", "display_name": "Population", "score": 0.49315088987350464}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.48711729049682617}, {"id": "https://openalex.org/keywords/field", "display_name": "Field (mathematics)", "score": 0.44586268067359924}, {"id": "https://openalex.org/keywords/data-science", "display_name": "Data science", "score": 0.39155113697052}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8484776616096497}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.6870969533920288}, {"id": "https://openalex.org/C48044578", "wikidata": "https://www.wikidata.org/wiki/Q727490", "display_name": "Scalability", "level": 2, "score": 0.5257676243782043}, {"id": "https://openalex.org/C8642999", "wikidata": "https://www.wikidata.org/wiki/Q4171168", "display_name": "Hyperparameter", "level": 2, "score": 0.5211687088012695}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5168502926826477}, {"id": "https://openalex.org/C2908647359", "wikidata": "https://www.wikidata.org/wiki/Q2625603", "display_name": "Population", "level": 2, "score": 0.49315088987350464}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.48711729049682617}, {"id": "https://openalex.org/C9652623", "wikidata": "https://www.wikidata.org/wiki/Q190109", "display_name": "Field (mathematics)", "level": 2, "score": 0.44586268067359924}, {"id": "https://openalex.org/C2522767166", "wikidata": "https://www.wikidata.org/wiki/Q2374463", "display_name": "Data science", "level": 1, "score": 0.39155113697052}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C77088390", "wikidata": "https://www.wikidata.org/wiki/Q8513", "display_name": "Database", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C202444582", "wikidata": "https://www.wikidata.org/wiki/Q837863", "display_name": "Pure mathematics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C144024400", "wikidata": "https://www.wikidata.org/wiki/Q21201", "display_name": "Sociology", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C149923435", "wikidata": "https://www.wikidata.org/wiki/Q37732", "display_name": "Demography", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "doi:10.34133/icomputing.0025", "is_oa": true, "landing_page_url": "https://doi.org/10.34133/icomputing.0025", "pdf_url": null, "source": {"id": "https://openalex.org/S4387281904", "display_name": "Intelligent Computing", "issn_l": "2771-5892", "issn": ["2771-5892"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Intelligent Computing", "raw_type": "journal-article"}, {"id": "pmh:oai:doaj.org/article:4f120cb09b144fd2b84cdf86eca4658a", "is_oa": true, "landing_page_url": "https://doaj.org/article/4f120cb09b144fd2b84cdf86eca4658a", "pdf_url": null, "source": {"id": "https://openalex.org/S4306401280", "display_name": "DOAJ (DOAJ: Directory of Open Access Journals)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by-sa", "license_id": "https://openalex.org/licenses/cc-by-sa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Intelligent Computing, Vol 2 (2023)", "raw_type": "article"}], "best_oa_location": {"id": "doi:10.34133/icomputing.0025", "is_oa": true, "landing_page_url": "https://doi.org/10.34133/icomputing.0025", "pdf_url": null, "source": {"id": "https://openalex.org/S4387281904", "display_name": "Intelligent Computing", "issn_l": "2771-5892", "issn": ["2771-5892"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Intelligent Computing", "raw_type": "journal-article"}, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"grobid_xml": false, "pdf": false}, "content_urls": null, "referenced_works_count": 206, "referenced_works": ["https://openalex.org/W288089086", "https://openalex.org/W1496915407", "https://openalex.org/W1546079235", "https://openalex.org/W1547136369", "https://openalex.org/W1547737196", "https://openalex.org/W1560435054", "https://openalex.org/W1579738444", "https://openalex.org/W1595159159", "https://openalex.org/W1646035949", "https://openalex.org/W1674110665", "https://openalex.org/W1738827650", "https://openalex.org/W1774344329", "https://openalex.org/W1783519346", "https://openalex.org/W1915938087", "https://openalex.org/W1974142402", "https://openalex.org/W1977212008", "https://openalex.org/W1983770999", "https://openalex.org/W1987725948", "https://openalex.org/W1994492573", "https://openalex.org/W1995972800", "https://openalex.org/W2002305926", "https://openalex.org/W2004883573", "https://openalex.org/W2020399841", "https://openalex.org/W2035737909", "https://openalex.org/W2037380487", "https://openalex.org/W2038794597", "https://openalex.org/W2040622444", "https://openalex.org/W2047094503", "https://openalex.org/W2049287437", "https://openalex.org/W2059654640", "https://openalex.org/W2060846151", "https://openalex.org/W2066251678", "https://openalex.org/W2085951801", "https://openalex.org/W2097998348", "https://openalex.org/W2098907614", "https://openalex.org/W2099397840", "https://openalex.org/W2099746672", "https://openalex.org/W2100211715", "https://openalex.org/W2100955720", "https://openalex.org/W2101525630", "https://openalex.org/W2102660061", "https://openalex.org/W2104333095", "https://openalex.org/W2107770641", "https://openalex.org/W2110972482", "https://openalex.org/W2111935653", "https://openalex.org/W2116339921", "https://openalex.org/W2116770962", "https://openalex.org/W2119814172", "https://openalex.org/W2120181489", "https://openalex.org/W2121365620", "https://openalex.org/W2123408238", "https://openalex.org/W2126105956", "https://openalex.org/W2126351339", "https://openalex.org/W2127474646", "https://openalex.org/W2129272928", "https://openalex.org/W2129578597", "https://openalex.org/W2131241448", "https://openalex.org/W2132602793", "https://openalex.org/W2136848732", "https://openalex.org/W2137104525", "https://openalex.org/W2138321920", "https://openalex.org/W2138537392", "https://openalex.org/W2138817206", "https://openalex.org/W2141109918", "https://openalex.org/W2142508340", "https://openalex.org/W2145339207", "https://openalex.org/W2148067905", "https://openalex.org/W2148150594", "https://openalex.org/W2151083897", "https://openalex.org/W2153684665", "https://openalex.org/W2155921792", "https://openalex.org/W2162813238", "https://openalex.org/W2164424353", "https://openalex.org/W2165776394", "https://openalex.org/W2166963499", "https://openalex.org/W2169223808", "https://openalex.org/W2169803171", "https://openalex.org/W2174817438", "https://openalex.org/W2186820913", "https://openalex.org/W2419612459", "https://openalex.org/W2462548332", "https://openalex.org/W2482817076", "https://openalex.org/W2485228835", "https://openalex.org/W2490883651", "https://openalex.org/W2509705549", "https://openalex.org/W2570076534", "https://openalex.org/W2580175322", "https://openalex.org/W2596367596", "https://openalex.org/W2604908377", "https://openalex.org/W2621032660", "https://openalex.org/W2623843378", "https://openalex.org/W2736601468", "https://openalex.org/W2739858255", "https://openalex.org/W2746553466", "https://openalex.org/W2761873684", "https://openalex.org/W2766293931", "https://openalex.org/W2771145196", "https://openalex.org/W2779977383", "https://openalex.org/W2787387965", "https://openalex.org/W2810602713", "https://openalex.org/W2844058595", "https://openalex.org/W2845268560", "https://openalex.org/W2864378606", "https://openalex.org/W2892053860", "https://openalex.org/W2901412188", "https://openalex.org/W2904998808", "https://openalex.org/W2906697496", "https://openalex.org/W2912063360", "https://openalex.org/W2913423706", "https://openalex.org/W2914261249", "https://openalex.org/W2914461813", "https://openalex.org/W2937081431", "https://openalex.org/W2947246097", "https://openalex.org/W2951278471", "https://openalex.org/W2951774549", "https://openalex.org/W2952465248", "https://openalex.org/W2954089099", "https://openalex.org/W2954700257", "https://openalex.org/W2954989419", "https://openalex.org/W2959096608", "https://openalex.org/W2962975462", "https://openalex.org/W2963157343", "https://openalex.org/W2963276097", "https://openalex.org/W2963523627", "https://openalex.org/W2963672746", "https://openalex.org/W2963773324", "https://openalex.org/W2964025389", "https://openalex.org/W2964282857", "https://openalex.org/W2965185450", "https://openalex.org/W2965612550", "https://openalex.org/W2997026295", "https://openalex.org/W3002510907", "https://openalex.org/W3007384386", "https://openalex.org/W3009840839", "https://openalex.org/W3012825518", "https://openalex.org/W3018036994", "https://openalex.org/W3034606848", "https://openalex.org/W3035200689", "https://openalex.org/W3042532592", "https://openalex.org/W3091795298", "https://openalex.org/W3092213025", "https://openalex.org/W3098960920", "https://openalex.org/W3099518626", "https://openalex.org/W3101747404", "https://openalex.org/W3102731759", "https://openalex.org/W3104415870", "https://openalex.org/W3106527723", "https://openalex.org/W3111468295", "https://openalex.org/W3112644834", "https://openalex.org/W3120778962", "https://openalex.org/W3126171498", "https://openalex.org/W3127770912", "https://openalex.org/W3129214046", "https://openalex.org/W3129322645", "https://openalex.org/W3131342066", "https://openalex.org/W3145430300", "https://openalex.org/W3164754954", "https://openalex.org/W3167538233", "https://openalex.org/W3174049743", "https://openalex.org/W3175917212", "https://openalex.org/W3175952800", "https://openalex.org/W3177052905", "https://openalex.org/W3178493357", "https://openalex.org/W3181207417", "https://openalex.org/W3182043095", "https://openalex.org/W3198149465", "https://openalex.org/W3198249268", "https://openalex.org/W3199990163", "https://openalex.org/W3201929238", "https://openalex.org/W3206305573", "https://openalex.org/W3207569066", "https://openalex.org/W3207779294", "https://openalex.org/W3212916397", "https://openalex.org/W3214692953", "https://openalex.org/W4210911782", "https://openalex.org/W4221141793", "https://openalex.org/W4221143659", "https://openalex.org/W4221148168", "https://openalex.org/W4225892134", "https://openalex.org/W4226445998", "https://openalex.org/W4232335189", "https://openalex.org/W4247067162", "https://openalex.org/W4248908364", "https://openalex.org/W4281807998", "https://openalex.org/W4283793632", "https://openalex.org/W4285734657", "https://openalex.org/W4285805326", "https://openalex.org/W4285805433", "https://openalex.org/W4287167426", "https://openalex.org/W4287660295", "https://openalex.org/W4287673640", "https://openalex.org/W4287757595", "https://openalex.org/W4292333472", "https://openalex.org/W4296706038", "https://openalex.org/W4298845604", "https://openalex.org/W4298857966", "https://openalex.org/W4308210960", "https://openalex.org/W4385188943", "https://openalex.org/W4394662461", "https://openalex.org/W4394805110", "https://openalex.org/W6674385629", "https://openalex.org/W6682262322", "https://openalex.org/W6739340459", "https://openalex.org/W6748317118", "https://openalex.org/W6757477279", "https://openalex.org/W6780608970"], "related_works": ["https://openalex.org/W2140186469", "https://openalex.org/W4390421286", "https://openalex.org/W4281847915", "https://openalex.org/W2602382373", "https://openalex.org/W3198113463", "https://openalex.org/W4285827128", "https://openalex.org/W2787698406", "https://openalex.org/W2963844355", "https://openalex.org/W4361251046", "https://openalex.org/W98577079"], "abstract_inverted_index": {"Reinforcement": [0], "learning": [1, 6, 25, 106, 135], "(RL)": [2], "is": [3], "a": [4, 33, 75, 103, 119, 179], "machine": [5], "approach": [7], "that": [8], "trains": [9], "agents": [10], "to": [11, 131, 142], "maximize": [12], "cumulative": [13], "rewards": [14], "through": [15], "interactions": [16], "with": [17, 23, 68], "environments.": [18], "The": [19], "integration": [20], "of": [21, 36, 77, 105, 122, 168, 189, 203], "RL": [22], "deep": [24], "has": [26, 108], "recently": [27], "resulted": [28], "in": [29, 32, 64, 81, 87, 90, 112, 146, 166, 186], "impressive": [30], "achievements": [31], "wide": [34], "range": [35], "challenging": [37], "tasks,": [38], "including": [39, 148], "board": [40], "games,": [41, 43], "arcade": [42], "and": [44, 72, 93, 157, 172, 183, 195, 207, 214], "robot": [45], "control.": [46], "Despite": [47], "these": [48, 114], "successes,": [49], "several": [50], "critical": [51], "challenges": [52, 86, 194], "remain,": [53], "such": [54], "as": [55, 132, 178], "brittle": [56], "convergence": [57], "properties": [58], "caused": [59], "by": [60], "sensitive": [61], "hyperparameters,": [62], "difficulties": [63], "temporal": [65], "credit": [66, 88], "assignment": [67, 89], "long": [69], "time": [70], "horizons": [71], "sparse": [73], "rewards,": [74], "lack": [76], "diverse": [78], "exploration,": [79, 153], "particularly": [80], "continuous": [82], "search": [83], "space": [84], "scenarios,": [85], "multi-agent": [91], "RL,": [92, 129, 147], "conflicting": [94], "objectives": [95], "for": [96, 125, 181, 197, 217], "rewards.": [97], "Evolutionary": [98], "computation": [99], "(EC),": [100], "which": [101], "maintains": [102], "population": [104], "agents,": [107], "demonstrated": [109], "promising": [110, 222], "performance": [111], "addressing": [113], "limitations.": [115], "This": [116, 175], "article": [117], "presents": [118], "comprehensive": [120], "survey": [121, 176], "state-of-the-art": [123], "methods": [124, 140, 213], "integrating": [126], "EC": [127], "into": [128], "referred": [130], "evolutionary": [133], "reinforcement": [134], "(EvoRL).": [136], "We": [137, 160], "categorize": [138], "EvoRL": [139], "according": [141], "key": [143], "research": [144, 164, 224], "areas": [145], "hyperparameter": [149], "optimization,": [150], "policy": [151], "search,": [152], "reward": [154], "shaping,": [155], "meta-RL,": [156], "multi-objective": [158], "RL.": [159], "then": [161], "discuss": [162], "future": [163, 198], "directions": [165], "terms": [167], "efficient": [169, 212], "methods,": [170], "benchmarks,": [171], "scalable": [173], "platforms.": [174], "serves": [177], "resource": [180], "researchers": [182, 206], "practitioners": [184, 208], "interested": [185], "the": [187, 192, 201], "field": [188], "EvoRL,": [190, 218], "highlighting": [191], "important": [193], "opportunities": [196], "research.": [199], "With": [200], "help": [202], "this": [204, 221], "survey,": [205], "can": [209], "develop": [210], "more": [211], "tailored": [215], "benchmarks": [216], "further": [219], "advancing": [220], "cross-disciplinary": [223], "field.": [225]}, "counts_by_year": [{"year": 2026, "cited_by_count": 14}, {"year": 2025, "cited_by_count": 33}, {"year": 2024, "cited_by_count": 28}, {"year": 2023, "cited_by_count": 4}], "updated_date": "2026-07-04T07:58:01.006859", "created_date": "2025-10-10T00:00:00"}], "group_by": []}