{"meta": {"count": 4809, "db_response_time_ms": 487, "page": 1, "per_page": 25, "groups_count": null, "cost_usd": 0.001}, "results": [{"id": "https://openalex.org/W2611243847", "doi": null, "title": "Planning Algorithms", "display_name": "Planning Algorithms", "relevance_score": 750.35516, "publication_year": 2006, "publication_date": "2006-01-01", "ids": {"openalex": "https://openalex.org/W2611243847", "mag": "2611243847"}, "language": "en", "primary_location": {"id": "pmh:oai:CiteSeerX.psu:10.1.1.1.7086", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.7086", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://msl.cs.uiuc.edu/planning/bookbig.pdf", "raw_type": "text"}, "type": "book", "indexed_in": [], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5065104734", "display_name": "Steven M. LaValle", "orcid": "https://orcid.org/0000-0003-4841-2584"}, "institutions": [], "countries": [], "is_corresponding": true, "raw_author_name": "Steven M. LaValle", "raw_affiliation_strings": [], "raw_orcid": "https://orcid.org/0000-0003-4841-2584", "affiliations": []}], "institutions": [], "countries_distinct_count": 0, "institutions_distinct_count": 1, "corresponding_author_ids": ["https://openalex.org/A5065104734"], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": 98.1109, "has_fulltext": false, "cited_by_count": 4200, "citation_normalized_percentile": {"value": 1.0, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 91, "max": 100}, "biblio": {"volume": null, "issue": null, "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10906", "display_name": "AI-based Problem Solving and Planning", "score": 0.05660000070929527, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10906", "display_name": "AI-based Problem Solving and Planning", "score": 0.05660000070929527, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5103651881217957}, {"id": "https://openalex.org/keywords/algorithm", "display_name": "Algorithm", "score": 0.44396767020225525}], "concepts": [{"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5103651881217957}, {"id": "https://openalex.org/C11413529", "wikidata": "https://www.wikidata.org/wiki/Q8366", "display_name": "Algorithm", "level": 1, "score": 0.44396767020225525}], "mesh": [], "locations_count": 6, "locations": [{"id": "pmh:oai:CiteSeerX.psu:10.1.1.1.7086", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.7086", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://msl.cs.uiuc.edu/planning/bookbig.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.221.1786", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.221.1786", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://msl.cs.uiuc.edu/planning/booka4.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.221.3806", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.221.3806", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://msl.cs.uiuc.edu/planning/book.pdf", "raw_type": "text"}, {"id": "pmh:oai:aleph.bib-bvb.de:BVB01-014980281", "is_oa": false, "landing_page_url": "http://www.loc.gov/catdir/enhancements/fy0642/2006010125-d.html", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, {"id": "pmh:oai:cds.cern.ch:991583", "is_oa": false, "landing_page_url": "http://cds.cern.ch/record/991583", "pdf_url": null, "source": {"id": "https://openalex.org/S4306402195", "display_name": "CERN Document Server (European Organization for Nuclear Research)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I67311998", "host_organization_name": "European Organization for Nuclear Research", "host_organization_lineage": ["https://openalex.org/I67311998"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": ""}, {"id": "mag:2611243847", "is_oa": false, "landing_page_url": "https://dl.acm.org/citation.cfm?id=1213331", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": null}], "best_oa_location": null, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/11", "score": 0.41999998688697815, "display_name": "Sustainable cities and communities"}], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 0, "referenced_works": [], "related_works": ["https://openalex.org/W2336416123", "https://openalex.org/W2313274380", "https://openalex.org/W2169528473", "https://openalex.org/W2159722616", "https://openalex.org/W2141664020", "https://openalex.org/W2128990851", "https://openalex.org/W2122410182", "https://openalex.org/W2110762409", "https://openalex.org/W2103120971", "https://openalex.org/W2098432798", "https://openalex.org/W2036016432", "https://openalex.org/W2000359213", "https://openalex.org/W1971998222", "https://openalex.org/W1971086298", "https://openalex.org/W1969483458", "https://openalex.org/W1521785144", "https://openalex.org/W1516027685", "https://openalex.org/W1424654272", "https://openalex.org/W131069610", "https://openalex.org/W101508493"], "abstract_inverted_index": {"This": [0], "book": [1], "presents": [2], "a": [3], "unified": [4], "treatment": [5], "of": [6, 10], "many": [7], "different": [8], "kinds": [9], "planning": [11, 38], "algorithms.": [12], "The": [13, 29], "subject": [14], "lies": [15], "at": [16], "the": [17], "crossroads": [18], "between": [19], "robotics,": [20], "control": [21], "theory,": [22, 47], "artificial": [23], "intelligence,": [24], "algorithms,": [25], "and": [26, 58], "computer": [27], "graphics.": [28], "particular": [30], "subjects": [31], "covered": [32], "include": [33], "motion": [34], "planning,": [35, 37, 42, 45, 55, 57], "discrete": [36], "under": [39], "uncertainty,": [40], "sensor-based": [41], "visibility,": [43], "decision-theoretic": [44], "game": [46], "information": [48], "spaces,": [49], "reinforcement": [50], "learning,": [51], "nonlinear": [52], "systems,": [53], "trajectory": [54], "nonholonomic": [56], "kinodynamic": [59], "planning.": [60]}, "counts_by_year": [{"year": 2025, "cited_by_count": 1}, {"year": 2023, "cited_by_count": 5}, {"year": 2022, "cited_by_count": 15}, {"year": 2021, "cited_by_count": 210}, {"year": 2020, "cited_by_count": 281}, {"year": 2019, "cited_by_count": 250}, {"year": 2018, "cited_by_count": 226}, {"year": 2017, "cited_by_count": 212}, {"year": 2016, "cited_by_count": 246}, {"year": 2015, "cited_by_count": 333}, {"year": 2014, "cited_by_count": 307}, {"year": 2013, "cited_by_count": 325}, {"year": 2012, "cited_by_count": 302}], "updated_date": "2025-11-06T04:12:42.849631", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2973229164", "doi": "https://doi.org/10.32657/10356/90191", "title": "Continuous control for robot based on deep reinforcement learning", "display_name": "Continuous control for robot based on deep reinforcement learning", "relevance_score": 424.46716, "publication_year": 2019, "publication_date": "2019-01-01", "ids": {"openalex": "https://openalex.org/W2973229164", "doi": "https://doi.org/10.32657/10356/90191", "mag": "2973229164"}, "language": "en", "primary_location": {"id": "doi:10.32657/10356/90191", "is_oa": true, "landing_page_url": "https://doi.org/10.32657/10356/90191", "pdf_url": "https://dr.ntu.edu.sg/bitstream/10356/90191/1/thesis-final.pdf", "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Nanyang Technological University", "raw_type": "dissertation"}, "type": "dissertation", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "gold", "oa_url": "https://dr.ntu.edu.sg/bitstream/10356/90191/1/thesis-final.pdf", "any_repository_has_fulltext": null}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5070751669", "display_name": "Shansi Zhang", "orcid": null}, "institutions": [], "countries": [], "is_corresponding": true, "raw_author_name": "Zhang, Shansi", "raw_affiliation_strings": [], "raw_orcid": null, "affiliations": []}], "institutions": [], "countries_distinct_count": 0, "institutions_distinct_count": 1, "corresponding_author_ids": ["https://openalex.org/A5070751669"], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": null, "has_fulltext": true, "cited_by_count": 933, "citation_normalized_percentile": null, "cited_by_percentile_year": null, "biblio": {"volume": null, "issue": null, "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T14225", "display_name": "Advanced Sensor and Control Systems", "score": 0.6629999876022339, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T14225", "display_name": "Advanced Sensor and Control Systems", "score": 0.6629999876022339, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T13717", "display_name": "Advanced Algorithms and Applications", "score": 0.6290000081062317, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8376568555831909}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5035085082054138}, {"id": "https://openalex.org/keywords/reinforcement", "display_name": "Reinforcement", "score": 0.47970980405807495}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.47445306181907654}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.46188730001449585}, {"id": "https://openalex.org/keywords/human\u2013computer-interaction", "display_name": "Human\u2013computer interaction", "score": 0.3435513973236084}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.2595599889755249}, {"id": "https://openalex.org/keywords/structural-engineering", "display_name": "Structural engineering", "score": 0.04524645209312439}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8376568555831909}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5035085082054138}, {"id": "https://openalex.org/C67203356", "wikidata": "https://www.wikidata.org/wiki/Q1321905", "display_name": "Reinforcement", "level": 2, "score": 0.47970980405807495}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.47445306181907654}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.46188730001449585}, {"id": "https://openalex.org/C107457646", "wikidata": "https://www.wikidata.org/wiki/Q207434", "display_name": "Human\u2013computer interaction", "level": 1, "score": 0.3435513973236084}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.2595599889755249}, {"id": "https://openalex.org/C66938386", "wikidata": "https://www.wikidata.org/wiki/Q633538", "display_name": "Structural engineering", "level": 1, "score": 0.04524645209312439}], "mesh": [], "locations_count": 2, "locations": [{"id": "doi:10.32657/10356/90191", "is_oa": true, "landing_page_url": "https://doi.org/10.32657/10356/90191", "pdf_url": "https://dr.ntu.edu.sg/bitstream/10356/90191/1/thesis-final.pdf", "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Nanyang Technological University", "raw_type": "dissertation"}, {"id": "pmh:oai:dr.ntu.edu.sg:10356/90191", "is_oa": false, "landing_page_url": "https://hdl.handle.net/10356/90191", "pdf_url": null, "source": {"id": "https://openalex.org/S4306402609", "display_name": "DR-NTU (Nanyang Technological University)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I172675005", "host_organization_name": "Nanyang Technological University", "host_organization_lineage": ["https://openalex.org/I172675005"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "Thesis"}], "best_oa_location": {"id": "doi:10.32657/10356/90191", "is_oa": true, "landing_page_url": "https://doi.org/10.32657/10356/90191", "pdf_url": "https://dr.ntu.edu.sg/bitstream/10356/90191/1/thesis-final.pdf", "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Nanyang Technological University", "raw_type": "dissertation"}, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"pdf": true, "grobid_xml": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W2973229164.pdf", "grobid_xml": "https://content.openalex.org/works/W2973229164.grobid-xml"}, "referenced_works_count": 108, "referenced_works": ["https://openalex.org/W41554520", "https://openalex.org/W57318391", "https://openalex.org/W567721252", "https://openalex.org/W1191599655", "https://openalex.org/W1522301498", "https://openalex.org/W1574474448", "https://openalex.org/W1658008008", "https://openalex.org/W1771410628", "https://openalex.org/W1810943226", "https://openalex.org/W1836465849", "https://openalex.org/W2121092017", "https://openalex.org/W2145339207", "https://openalex.org/W2155027007", "https://openalex.org/W2155968351", "https://openalex.org/W2173248099", "https://openalex.org/W2173564293", "https://openalex.org/W2201581102", "https://openalex.org/W2257979135", "https://openalex.org/W2260756217", "https://openalex.org/W2298546714", "https://openalex.org/W2343568200", "https://openalex.org/W2411690432", "https://openalex.org/W2509374375", "https://openalex.org/W2522340145", "https://openalex.org/W2567061106", "https://openalex.org/W2575705757", "https://openalex.org/W2592873849", "https://openalex.org/W2593766708", "https://openalex.org/W2605102758", "https://openalex.org/W2606508169", "https://openalex.org/W2610395436", "https://openalex.org/W2614839826", "https://openalex.org/W2620671107", "https://openalex.org/W2626580042", "https://openalex.org/W2724169821", "https://openalex.org/W2726187156", "https://openalex.org/W2736601468", "https://openalex.org/W2739473244", "https://openalex.org/W2746553466", "https://openalex.org/W2749928749", "https://openalex.org/W2754000639", "https://openalex.org/W2756826236", "https://openalex.org/W2757631751", "https://openalex.org/W2761873684", "https://openalex.org/W2766447205", "https://openalex.org/W2770892919", "https://openalex.org/W2781726626", "https://openalex.org/W2786036274", "https://openalex.org/W2786928559", "https://openalex.org/W2787613197", "https://openalex.org/W2787938642", "https://openalex.org/W2788781499", "https://openalex.org/W2798472928", "https://openalex.org/W2798705390", "https://openalex.org/W2811329274", "https://openalex.org/W2823112946", "https://openalex.org/W2869375357", "https://openalex.org/W2885163910", "https://openalex.org/W2885550588", "https://openalex.org/W2887927966", "https://openalex.org/W2891076394", "https://openalex.org/W2895453875", "https://openalex.org/W2896066033", "https://openalex.org/W2951799221", "https://openalex.org/W2962887844", "https://openalex.org/W2962977206", "https://openalex.org/W2963095800", "https://openalex.org/W2963099939", "https://openalex.org/W2963211300", "https://openalex.org/W2963411833", "https://openalex.org/W2963428623", "https://openalex.org/W2963523627", "https://openalex.org/W2963674921", "https://openalex.org/W2963729305", "https://openalex.org/W2963780790", "https://openalex.org/W2963864421", "https://openalex.org/W2964043796", "https://openalex.org/W2990747716", "https://openalex.org/W3100944043", "https://openalex.org/W3104515094", "https://openalex.org/W4244566287", "https://openalex.org/W4289422208", "https://openalex.org/W4289763054", "https://openalex.org/W4293542549", "https://openalex.org/W4293864724", "https://openalex.org/W4297791094", "https://openalex.org/W4297795161", "https://openalex.org/W4297797010", "https://openalex.org/W4298857966", "https://openalex.org/W4298876402", "https://openalex.org/W4300799055", "https://openalex.org/W4302570325", "https://openalex.org/W6616173779", "https://openalex.org/W6637967152", "https://openalex.org/W6638018090", "https://openalex.org/W6684205842", "https://openalex.org/W6687681856", "https://openalex.org/W6730111887", "https://openalex.org/W6731334075", "https://openalex.org/W6740801417", "https://openalex.org/W6741002519", "https://openalex.org/W6745620495", "https://openalex.org/W6748554570", "https://openalex.org/W6753060773", "https://openalex.org/W6753925943", "https://openalex.org/W6754665250", "https://openalex.org/W6754957883", "https://openalex.org/W6891797237"], "related_works": ["https://openalex.org/W2920061524", "https://openalex.org/W4310083477", "https://openalex.org/W1977959518", "https://openalex.org/W2038908348", "https://openalex.org/W2107890255", "https://openalex.org/W2106552856", "https://openalex.org/W2089013912", "https://openalex.org/W2076061571", "https://openalex.org/W1987513656", "https://openalex.org/W2145821588"], "abstract_inverted_index": {"One": [0], "of": [1, 5, 23], "the": [2, 11, 21, 34, 38], "main": [3], "targets": [4], "artificial": [6], "intelligence": [7], "is": [8], "to": [9, 57], "solve": [10], "complex": [12], "control": [13], "problems": [14], "which": [15], "have": [16], "high-dimensional": [17], "observation": [18], "spaces.": [19, 65], "Recently,": [20], "combination": [22], "deep": [24, 51], "learning": [25, 28, 53], "and": [26, 40, 45], "reinforcement": [27, 52], "has": [29], "made": [30], "remarkable": [31], "progress,": [32], "including": [33], "high-level": [35], "performance": [36], "in": [37], "video": [39], "board": [41], "games,": [42], "3D": [43], "navigations": [44], "robotic": [46, 60], "control.": [47], "In": [48], "this": [49], "thesis,": [50], "algorithms": [54], "are": [55], "studied": [56], "perform": [58], "some": [59], "tasks": [61], "with": [62], "continuous": [63], "action": [64]}, "counts_by_year": [{"year": 2026, "cited_by_count": 16}, {"year": 2025, "cited_by_count": 285}, {"year": 2024, "cited_by_count": 300}, {"year": 2023, "cited_by_count": 197}, {"year": 2022, "cited_by_count": 88}, {"year": 2021, "cited_by_count": 42}, {"year": 2020, "cited_by_count": 5}], "updated_date": "2026-05-13T08:25:38.343686", "created_date": "2019-09-19T00:00:00"}, {"id": "https://openalex.org/W4327571609", "doi": "https://doi.org/10.1016/j.artint.2023.103905", "title": "Safe multi-agent reinforcement learning for multi-robot control", "display_name": "Safe multi-agent reinforcement learning for multi-robot control", "relevance_score": 413.42615, "publication_year": 2023, "publication_date": "2023-03-16", "ids": {"openalex": "https://openalex.org/W4327571609", "doi": "https://doi.org/10.1016/j.artint.2023.103905"}, "language": "en", "primary_location": {"id": "doi:10.1016/j.artint.2023.103905", "is_oa": true, "landing_page_url": "https://doi.org/10.1016/j.artint.2023.103905", "pdf_url": null, "source": {"id": "https://openalex.org/S196139623", "display_name": "Artificial Intelligence", "issn_l": "0004-3702", "issn": ["0004-3702", "1872-7921"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": "cc-by-nc-nd", "license_id": "https://openalex.org/licenses/cc-by-nc-nd", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Artificial Intelligence", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "hybrid", "oa_url": "https://doi.org/10.1016/j.artint.2023.103905", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5062677595", "display_name": "Shangding Gu", "orcid": "https://orcid.org/0000-0002-2722-3779"}, "institutions": [{"id": "https://openalex.org/I20231570", "display_name": "Peking University", "ror": "https://ror.org/02v51f717", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I20231570"]}, {"id": "https://openalex.org/I4210100255", "display_name": "Beijing Academy of Artificial Intelligence", "ror": "https://ror.org/016a74861", "country_code": "CN", "type": "other", "lineage": ["https://openalex.org/I4210100255"]}, {"id": "https://openalex.org/I62916508", "display_name": "Technical University of Munich", "ror": "https://ror.org/02kkvpp62", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I62916508"]}], "countries": ["CN", "DE"], "is_corresponding": false, "raw_author_name": "Shangding Gu", "raw_affiliation_strings": ["Department of Computer Science, Technical University of Munich, Germany", "Institute for Artificial Intelligence, Peking University, China"], "raw_orcid": "https://orcid.org/0000-0002-2722-3779", "affiliations": [{"raw_affiliation_string": "Department of Computer Science, Technical University of Munich, Germany", "institution_ids": ["https://openalex.org/I62916508"]}, {"raw_affiliation_string": "Institute for Artificial Intelligence, Peking University, China", "institution_ids": ["https://openalex.org/I4210100255", "https://openalex.org/I20231570"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5038889758", "display_name": "Jakub Grudzien Kuba", "orcid": null}, "institutions": [{"id": "https://openalex.org/I40120149", "display_name": "University of Oxford", "ror": "https://ror.org/052gg0110", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I40120149"]}], "countries": ["GB"], "is_corresponding": false, "raw_author_name": "Jakub Grudzien Kuba", "raw_affiliation_strings": ["Department of Statistics, University of Oxford, UK"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Statistics, University of Oxford, UK", "institution_ids": ["https://openalex.org/I40120149"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5102989564", "display_name": "Yuanpei Chen", "orcid": "https://orcid.org/0000-0002-4674-553X"}, "institutions": [{"id": "https://openalex.org/I20231570", "display_name": "Peking University", "ror": "https://ror.org/02v51f717", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I20231570"]}, {"id": "https://openalex.org/I4210100255", "display_name": "Beijing Academy of Artificial Intelligence", "ror": "https://ror.org/016a74861", "country_code": "CN", "type": "other", "lineage": ["https://openalex.org/I4210100255"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Yuanpei Chen", "raw_affiliation_strings": ["Institute for Artificial Intelligence, Peking University, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Artificial Intelligence, Peking University, China", "institution_ids": ["https://openalex.org/I4210100255", "https://openalex.org/I20231570"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5002080576", "display_name": "Yali Du", "orcid": "https://orcid.org/0000-0001-5683-2621"}, "institutions": [{"id": "https://openalex.org/I183935753", "display_name": "King's College London", "ror": "https://ror.org/0220mzb33", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I124357947", "https://openalex.org/I183935753"]}], "countries": ["GB"], "is_corresponding": false, "raw_author_name": "Yali Du", "raw_affiliation_strings": ["Department of Informatics, King's College London, UK"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Informatics, King's College London, UK", "institution_ids": ["https://openalex.org/I183935753"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5069283448", "display_name": "Yang Long", "orcid": "https://orcid.org/0000-0001-7600-3396"}, "institutions": [{"id": "https://openalex.org/I20231570", "display_name": "Peking University", "ror": "https://ror.org/02v51f717", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I20231570"]}, {"id": "https://openalex.org/I4210100255", "display_name": "Beijing Academy of Artificial Intelligence", "ror": "https://ror.org/016a74861", "country_code": "CN", "type": "other", "lineage": ["https://openalex.org/I4210100255"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Long Yang", "raw_affiliation_strings": ["Institute for Artificial Intelligence, Peking University, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Artificial Intelligence, Peking University, China", "institution_ids": ["https://openalex.org/I4210100255", "https://openalex.org/I20231570"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5063781430", "display_name": "Alois Knoll", "orcid": "https://orcid.org/0000-0003-4840-076X"}, "institutions": [{"id": "https://openalex.org/I62916508", "display_name": "Technical University of Munich", "ror": "https://ror.org/02kkvpp62", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I62916508"]}], "countries": ["DE"], "is_corresponding": false, "raw_author_name": "Alois Knoll", "raw_affiliation_strings": ["Department of Computer Science, Technical University of Munich, Germany"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Computer Science, Technical University of Munich, Germany", "institution_ids": ["https://openalex.org/I62916508"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5090073634", "display_name": "Yaodong Yang", "orcid": "https://orcid.org/0000-0001-8132-5613"}, "institutions": [{"id": "https://openalex.org/I20231570", "display_name": "Peking University", "ror": "https://ror.org/02v51f717", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I20231570"]}, {"id": "https://openalex.org/I4210100255", "display_name": "Beijing Academy of Artificial Intelligence", "ror": "https://ror.org/016a74861", "country_code": "CN", "type": "other", "lineage": ["https://openalex.org/I4210100255"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Yaodong Yang", "raw_affiliation_strings": ["Institute for Artificial Intelligence, Peking University, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Artificial Intelligence, Peking University, China", "institution_ids": ["https://openalex.org/I4210100255", "https://openalex.org/I20231570"]}]}], "institutions": [], "countries_distinct_count": 3, "institutions_distinct_count": 7, "corresponding_author_ids": ["https://openalex.org/A5090073634"], "corresponding_institution_ids": ["https://openalex.org/I20231570", "https://openalex.org/I4210100255"], "apc_list": {"value": 3670, "currency": "USD", "value_usd": 3670}, "apc_paid": {"value": 3670, "currency": "USD", "value_usd": 3670}, "fwci": 19.4267, "has_fulltext": false, "cited_by_count": 117, "citation_normalized_percentile": {"value": 0.99556028, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 96, "max": 100}, "biblio": {"volume": "319", "issue": null, "first_page": "103905", "last_page": "103905"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998999834060669, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998999834060669, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11099", "display_name": "Autonomous Vehicle Technology and Safety", "score": 0.9901000261306763, "subfield": {"id": "https://openalex.org/subfields/2203", "display_name": "Automotive Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9839000105857849, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.883181095123291}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.6126446723937988}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5663607716560364}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.5441109538078308}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.48300397396087646}, {"id": "https://openalex.org/keywords/state", "display_name": "State (computer science)", "score": 0.449796199798584}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.44975346326828003}, {"id": "https://openalex.org/keywords/marl", "display_name": "Marl", "score": 0.44429540634155273}, {"id": "https://openalex.org/keywords/mathematical-optimization", "display_name": "Mathematical optimization", "score": 0.386458158493042}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.17244932055473328}, {"id": "https://openalex.org/keywords/algorithm", "display_name": "Algorithm", "score": 0.13461393117904663}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.883181095123291}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.6126446723937988}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5663607716560364}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.5441109538078308}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.48300397396087646}, {"id": "https://openalex.org/C48103436", "wikidata": "https://www.wikidata.org/wiki/Q599031", "display_name": "State (computer science)", "level": 2, "score": 0.449796199798584}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.44975346326828003}, {"id": "https://openalex.org/C92927620", "wikidata": "https://www.wikidata.org/wiki/Q184053", "display_name": "Marl", "level": 3, "score": 0.44429540634155273}, {"id": "https://openalex.org/C126255220", "wikidata": "https://www.wikidata.org/wiki/Q141495", "display_name": "Mathematical optimization", "level": 1, "score": 0.386458158493042}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.17244932055473328}, {"id": "https://openalex.org/C11413529", "wikidata": "https://www.wikidata.org/wiki/Q8366", "display_name": "Algorithm", "level": 1, "score": 0.13461393117904663}, {"id": "https://openalex.org/C151730666", "wikidata": "https://www.wikidata.org/wiki/Q7205", "display_name": "Paleontology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C109007969", "wikidata": "https://www.wikidata.org/wiki/Q749565", "display_name": "Structural basin", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 3, "locations": [{"id": "doi:10.1016/j.artint.2023.103905", "is_oa": true, "landing_page_url": "https://doi.org/10.1016/j.artint.2023.103905", "pdf_url": null, "source": {"id": "https://openalex.org/S196139623", "display_name": "Artificial Intelligence", "issn_l": "0004-3702", "issn": ["0004-3702", "1872-7921"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": "cc-by-nc-nd", "license_id": "https://openalex.org/licenses/cc-by-nc-nd", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Artificial Intelligence", "raw_type": "journal-article"}, {"id": "pmh:oai:kclpure.kcl.ac.uk:openaire/4b6e2578-0b6d-455a-84f9-b317b80838cc", "is_oa": true, "landing_page_url": "https://kclpure.kcl.ac.uk/portal/en/publications/4b6e2578-0b6d-455a-84f9-b317b80838cc", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400216", "display_name": "Research Portal (King's College London)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I183935753", "host_organization_name": "King's College London", "host_organization_lineage": ["https://openalex.org/I183935753"], "host_organization_lineage_names": [], "type": "repository"}, "license": "other-oa", "license_id": "https://openalex.org/licenses/other-oa", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Gu, S, Grudzien Kuba, J, Chen, Y, Du, Y, Yang, L, Knoll, A & Yang, Y 2023, 'Safe multi-agent reinforcement learning for multi-robot control', ARTIFICIAL INTELLIGENCE, vol. 319, 103905. https://doi.org/10.1016/j.artint.2023.103905", "raw_type": "info:eu-repo/semantics/publishedVersion"}, {"id": "pmh:oai:mediatum.ub.tum.de:node/1702843", "is_oa": false, "landing_page_url": "https://mediatum.ub.tum.de/1702843", "pdf_url": null, "source": {"id": "https://openalex.org/S4377196330", "display_name": "mediaTUM  (Technical University of Munich)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I62916508", "host_organization_name": "Technical University of Munich", "host_organization_lineage": ["https://openalex.org/I62916508"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "article"}], "best_oa_location": {"id": "doi:10.1016/j.artint.2023.103905", "is_oa": true, "landing_page_url": "https://doi.org/10.1016/j.artint.2023.103905", "pdf_url": null, "source": {"id": "https://openalex.org/S196139623", "display_name": "Artificial Intelligence", "issn_l": "0004-3702", "issn": ["0004-3702", "1872-7921"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": "cc-by-nc-nd", "license_id": "https://openalex.org/licenses/cc-by-nc-nd", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Artificial Intelligence", "raw_type": "journal-article"}, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/17", "score": 0.44999998807907104, "display_name": "Partnerships for the goals"}], "awards": [{"id": "https://openalex.org/G2730713857", "display_name": null, "funder_award_id": "945539", "funder_id": "https://openalex.org/F4320335254", "funder_display_name": "Horizon 2020"}, {"id": "https://openalex.org/G3506733245", "display_name": null, "funder_award_id": "Z221100003422004", "funder_id": "https://openalex.org/F4320325902", "funder_display_name": "Beijing Municipal Science and Technology Commission"}], "funders": [{"id": "https://openalex.org/F4320325902", "display_name": "Beijing Municipal Science and Technology Commission", "ror": null}, {"id": "https://openalex.org/F4320335254", "display_name": "Horizon 2020", "ror": null}], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 47, "referenced_works": ["https://openalex.org/W41554520", "https://openalex.org/W1845972764", "https://openalex.org/W2040871222", "https://openalex.org/W2050835671", "https://openalex.org/W2128410140", "https://openalex.org/W2234962923", "https://openalex.org/W2257979135", "https://openalex.org/W2291649624", "https://openalex.org/W2560504659", "https://openalex.org/W2604213426", "https://openalex.org/W2915117209", "https://openalex.org/W2950430092", "https://openalex.org/W2954295423", "https://openalex.org/W3032916997", "https://openalex.org/W3048871122", "https://openalex.org/W3157410348", "https://openalex.org/W3162902207", "https://openalex.org/W3173294282", "https://openalex.org/W3195968524", "https://openalex.org/W3196692929", "https://openalex.org/W3200561352", "https://openalex.org/W3201466613", "https://openalex.org/W4233813419", "https://openalex.org/W4250331344", "https://openalex.org/W4292313830", "https://openalex.org/W4294562617", "https://openalex.org/W4310895557", "https://openalex.org/W6638018090", "https://openalex.org/W6676896484", "https://openalex.org/W6682367392", "https://openalex.org/W6687063787", "https://openalex.org/W6737893269", "https://openalex.org/W6748239807", "https://openalex.org/W6749304979", "https://openalex.org/W6751535212", "https://openalex.org/W6758846586", "https://openalex.org/W6764658791", "https://openalex.org/W6784856472", "https://openalex.org/W6788898170", "https://openalex.org/W6793975612", "https://openalex.org/W6794308622", "https://openalex.org/W6797795308", "https://openalex.org/W6800004206", "https://openalex.org/W6800526921", "https://openalex.org/W6801514172", "https://openalex.org/W6802610412", "https://openalex.org/W6843754643"], "related_works": ["https://openalex.org/W2126019709", "https://openalex.org/W1702901972", "https://openalex.org/W4249798507", "https://openalex.org/W2069775250", "https://openalex.org/W2093541819", "https://openalex.org/W2032294417", "https://openalex.org/W2152754392", "https://openalex.org/W1989172970", "https://openalex.org/W2196316523", "https://openalex.org/W1899363654"], "abstract_inverted_index": {"A": [0], "challenging": [1], "problem": [2, 87, 221], "in": [3, 14, 39, 52, 107, 193], "robotics": [4], "is": [5, 227, 236], "how": [6], "to": [7, 58, 76, 97, 120, 165], "control": [8, 20, 48, 173], "multiple": [9, 277], "robots": [10], "cooperatively": [11], "and": [12, 93, 109, 137, 158, 171, 199, 208, 245, 267, 285], "safely": [13], "real-world": [15], "applications.": [16], "Yet,": [17], "developing": [18], "multi-robot": [19, 47], "methods": [21, 188], "from": [22], "the": [23, 84, 121, 143, 167, 180, 194, 213, 281], "perspective": [24], "of": [25, 74, 111, 169, 222, 283], "safe": [26, 44, 78, 85, 127, 146, 182, 223, 252], "multi-agent": [27, 128, 224], "reinforcement": [28, 225], "learning": [29, 226], "(MARL)": [30], "has": [31, 57], "merely": [32], "been": [33], "studied.": [34], "To": [35], "fill": [36], "this": [37, 40], "gap,": [38], "study,": [41], "we": [42, 82, 124, 141], "investigate": [43], "MARL": [45, 86, 147, 170, 183, 253], "for": [46], "on": [49, 179, 276], "cooperative": [50], "tasks,": [51], "which": [53], "each": [54], "individual": [55], "robot": [56, 172], "not": [59], "only": [60], "meet": [61], "its": [62], "own": [63], "safety": [64, 112, 201, 242], "constraints": [65, 113, 202, 243], "while": [66], "maximising": [67], "their": [68], "reward,": [69], "but": [70], "also": [71], "consider": [72], "those": [73], "others": [75], "guarantee": [77], "team": [79], "behaviours.": [80], "Firstly,": [81], "formulate": [83], "as": [88, 118], "a": [89], "constrained": [90, 231], "Markov": [91], "game": [92], "employ": [94], "policy": [95, 129, 232], "optimisation": [96, 233], "solve": [98], "it": [99], "theoretically.": [100], "The": [101, 220], "proposed": [102], "algorithm": [103], "guarantees": [104], "monotonic": [105, 246], "improvement": [106, 248], "reward": [108, 198], "satisfaction": [110, 244], "at": [114, 212], "every": [115], "iteration.": [116], "Secondly,": [117], "approximations": [119], "theoretical": [122], "solution,": [123], "propose": [125], "two": [126], "gradient": [130], "methods:": [131], "Multi-Agent": [132, 149, 154, 160, 258, 263, 269], "Constrained": [133], "Policy": [134], "Optimisation": [135], "(MACPO)": [136, 234], "MAPPO-Lagrangian": [138], ".": [139], "Thirdly,": [140], "develop": [142], "first": [144], "three": [145, 181], "benchmarks\u2014Safe": [148], "MuJoCo": [150, 259], "(Safe": [151, 156, 163, 260, 265, 272], "MAMuJoCo),": [152, 261], "Safe": [153, 159, 257, 262, 268], "Robosuite": [155, 264], "MARobosuite)": [157, 266], "Isaac": [161, 270], "Gym": [162, 271], "MAIG)": [164], "expand": [166], "toolkit": [168], "research": [174], "communities.": [175], "Finally,": [176], "experimental": [177], "results": [178], "benchmarks": [184, 254], "indicate": [185], "that": [186], "our": [187], "can": [189], "achieve": [190], "state-of-the-art": [191], "performance": [192, 247], "balance": [195], "between": [196], "improving": [197], "satisfying": [200], "compared": [203], "with": [204], "strong": [205], "baselines.": [206], "Demos": [207], "code": [209], "are": [210, 255], "available": [211], "link": [214], "(": [215], "https://sites.google.com/view/aij-safe-marl/": [216], ").": [217], "2": [218], "\u2022": [219, 229, 238, 250, 274], "formulated.": [228], "Multi-agent": [230], "method": [235], "proposed.": [237], "MACPO": [239, 284], "ensures": [240], "both": [241], "guarantee.": [249], "Three": [251], "developed:": [256], "MAIG).": [273], "Experiments": [275], "benchmark": [278], "environments": [279], "confirm": [280], "effectiveness": [282], "MAPPO-Lagrangian.": [286]}, "counts_by_year": [{"year": 2026, "cited_by_count": 16}, {"year": 2025, "cited_by_count": 59}, {"year": 2024, "cited_by_count": 39}, {"year": 2023, "cited_by_count": 3}], "updated_date": "2026-05-21T09:19:25.381259", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2902125520", "doi": "https://doi.org/10.48550/arxiv.1812.00568", "title": "Visual Foresight: Model-Based Deep Reinforcement Learning for Vision-Based Robotic Control", "display_name": "Visual Foresight: Model-Based Deep Reinforcement Learning for Vision-Based Robotic Control", "relevance_score": 372.24747, "publication_year": 2018, "publication_date": "2018-12-03", "ids": {"openalex": "https://openalex.org/W2902125520", "doi": "https://doi.org/10.48550/arxiv.1812.00568", "mag": "2902125520"}, "language": "en", "primary_location": {"id": "pmh:oai:arXiv.org:1812.00568", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1812.00568", "pdf_url": "https://arxiv.org/pdf/1812.00568", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": null}, "type": "preprint", "indexed_in": ["arxiv", "datacite"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": "https://arxiv.org/pdf/1812.00568", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5029017885", "display_name": "Frederik Ebert", "orcid": null}, "institutions": [], "countries": [], "is_corresponding": true, "raw_author_name": "Ebert, Frederik", "raw_affiliation_strings": [], "affiliations": []}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5005431772", "display_name": "Chelsea Finn", "orcid": "https://orcid.org/0000-0001-6298-0874"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Finn, Chelsea", "raw_affiliation_strings": [], "affiliations": []}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5069471785", "display_name": "Sudeep Dasari", "orcid": "https://orcid.org/0000-0003-2600-2779"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Dasari, Sudeep", "raw_affiliation_strings": [], "affiliations": []}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5082350181", "display_name": "Annie Xie", "orcid": "https://orcid.org/0000-0003-1736-3775"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Xie, Annie", "raw_affiliation_strings": [], "affiliations": []}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5027666200", "display_name": "Alex X. Lee", "orcid": null}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Lee, Alex", "raw_affiliation_strings": [], "affiliations": []}, {"author_position": "last", "author": {"id": "https://openalex.org/A5026322200", "display_name": "Sergey Levine", "orcid": "https://orcid.org/0000-0001-6764-2743"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Levine, Sergey", "raw_affiliation_strings": [], "affiliations": []}], "institutions": [], "countries_distinct_count": 0, "institutions_distinct_count": 6, "corresponding_author_ids": ["https://openalex.org/A5029017885"], "corresponding_institution_ids": [], "apc_list": null, "apc_paid": null, "fwci": null, "has_fulltext": false, "cited_by_count": 264, "citation_normalized_percentile": null, "cited_by_percentile_year": null, "biblio": {"volume": null, "issue": null, "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9994999766349792, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9994999766349792, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9991000294685364, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10502", "display_name": "Advanced Memory and Neural Computing", "score": 0.9891999959945679, "subfield": {"id": "https://openalex.org/subfields/2208", "display_name": "Electrical and Electronic Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.8180503845214844}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7615313529968262}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7216188907623291}, {"id": "https://openalex.org/keywords/generalization", "display_name": "Generalization", "score": 0.62183678150177}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.5961755514144897}, {"id": "https://openalex.org/keywords/pixel", "display_name": "Pixel", "score": 0.5803840756416321}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.5160279273986816}, {"id": "https://openalex.org/keywords/object", "display_name": "Object (grammar)", "score": 0.5149974822998047}, {"id": "https://openalex.org/keywords/computer-vision", "display_name": "Computer vision", "score": 0.507150411605835}, {"id": "https://openalex.org/keywords/deep-learning", "display_name": "Deep learning", "score": 0.47004857659339905}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.3961804211139679}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.10632431507110596}], "concepts": [{"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.8180503845214844}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7615313529968262}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7216188907623291}, {"id": "https://openalex.org/C177148314", "wikidata": "https://www.wikidata.org/wiki/Q170084", "display_name": "Generalization", "level": 2, "score": 0.62183678150177}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.5961755514144897}, {"id": "https://openalex.org/C160633673", "wikidata": "https://www.wikidata.org/wiki/Q355198", "display_name": "Pixel", "level": 2, "score": 0.5803840756416321}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.5160279273986816}, {"id": "https://openalex.org/C2781238097", "wikidata": "https://www.wikidata.org/wiki/Q175026", "display_name": "Object (grammar)", "level": 2, "score": 0.5149974822998047}, {"id": "https://openalex.org/C31972630", "wikidata": "https://www.wikidata.org/wiki/Q844240", "display_name": "Computer vision", "level": 1, "score": 0.507150411605835}, {"id": "https://openalex.org/C108583219", "wikidata": "https://www.wikidata.org/wiki/Q197536", "display_name": "Deep learning", "level": 2, "score": 0.47004857659339905}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.3961804211139679}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.10632431507110596}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "pmh:oai:arXiv.org:1812.00568", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1812.00568", "pdf_url": "https://arxiv.org/pdf/1812.00568", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": null}, {"id": "doi:10.48550/arxiv.1812.00568", "is_oa": true, "landing_page_url": "https://doi.org/10.48550/arxiv.1812.00568", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": "article"}], "best_oa_location": {"id": "pmh:oai:arXiv.org:1812.00568", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1812.00568", "pdf_url": "https://arxiv.org/pdf/1812.00568", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": null}, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/4", "score": 0.7200000286102295, "display_name": "Quality Education"}], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 42, "referenced_works": ["https://openalex.org/W1522301498", "https://openalex.org/W1757796397", "https://openalex.org/W1977687214", "https://openalex.org/W1984383922", "https://openalex.org/W2012587148", "https://openalex.org/W2131600418", "https://openalex.org/W2140135625", "https://openalex.org/W2158782408", "https://openalex.org/W2167089254", "https://openalex.org/W2201912979", "https://openalex.org/W2281096776", "https://openalex.org/W2338684808", "https://openalex.org/W2400532028", "https://openalex.org/W2410156224", "https://openalex.org/W2473208550", "https://openalex.org/W2502312327", "https://openalex.org/W2528489519", "https://openalex.org/W2586067474", "https://openalex.org/W2592285981", "https://openalex.org/W2594961016", "https://openalex.org/W2606047872", "https://openalex.org/W2742605348", "https://openalex.org/W2765349170", "https://openalex.org/W2765994785", "https://openalex.org/W2770257943", "https://openalex.org/W2772709170", "https://openalex.org/W2796303840", "https://openalex.org/W2892490014", "https://openalex.org/W2949099979", "https://openalex.org/W2949962649", "https://openalex.org/W2951553872", "https://openalex.org/W2951748364", "https://openalex.org/W2951775809", "https://openalex.org/W2952578114", "https://openalex.org/W2953118818", "https://openalex.org/W2962736495", "https://openalex.org/W2962793652", "https://openalex.org/W2963125871", "https://openalex.org/W2963430173", "https://openalex.org/W2963435596", "https://openalex.org/W2963629403", "https://openalex.org/W2963634205"], "related_works": ["https://openalex.org/W4306904969", "https://openalex.org/W3162204513", "https://openalex.org/W2138720691", "https://openalex.org/W4362501864", "https://openalex.org/W4380318855", "https://openalex.org/W2031695474", "https://openalex.org/W2024136090", "https://openalex.org/W2586732548", "https://openalex.org/W3049728571", "https://openalex.org/W2964765435"], "abstract_inverted_index": {"Deep": [0], "reinforcement": [1], "learning": [2, 29], "(RL)": [3], "algorithms": [4], "can": [5, 175], "learn": [6], "complex": [7], "robotic": [8, 49], "skills": [9], "from": [10, 87], "raw": [11, 88], "sensory": [12, 89], "inputs,": [13], "but": [14], "have": [15], "yet": [16], "to": [17, 54, 82, 177], "achieve": [18], "the": [19, 85, 129, 192], "kind": [20], "of": [21, 144, 165, 186], "broad": [22], "generalization": [23], "and": [24, 51, 57, 69, 122, 138, 157, 181], "applicability": [25], "demonstrated": [26], "by": [27, 115, 159], "deep": [28, 37, 148], "methods": [30], "in": [31, 119], "supervised": [32], "domains.": [33], "We": [34, 170], "present": [35], "a": [36, 73, 78, 108, 160, 184], "RL": [38], "method": [39], "that": [40, 172], "is": [41, 133], "practical": [42], "for": [43], "real-world": [44], "robotics": [45], "tasks,": [46], "such": [47, 91], "as": [48, 92], "manipulation,": [50], "generalizes": [52], "effectively": [53], "never-before-seen": [55, 178], "tasks": [56, 114, 190], "objects.": [58], "In": [59], "these": [60], "settings,": [61], "ground": [62], "truth": [63], "reward": [64], "signals": [65], "are": [66, 151], "typically": [67], "unavailable,": [68], "we": [70, 98], "therefore": [71], "propose": [72], "self-supervised": [74], "model-based": [75], "approach,": [76], "where": [77, 107, 128], "predictive": [79, 149], "model": [80], "learns": [81], "directly": [83], "predict": [84], "future": [86], "readings,": [90], "camera": [93], "images.": [94], "At": [95], "test": [96], "time,": [97], "explore": [99], "three": [100], "distinct": [101], "goal": [102, 124, 126, 131, 145], "specification": [103], "methods:": [104], "designated": [105], "pixels,": [106], "user": [109], "specifies": [110], "desired": [111, 130], "object": [112, 188], "manipulation": [113, 189], "selecting": [116], "particular": [117], "pixels": [118], "an": [120, 136], "image": [121, 139], "corresponding": [123], "positions,": [125], "images,": [127], "state": [132], "specified": [134], "with": [135, 163], "image,": [137], "classifiers,": [140], "which": [141], "define": [142], "spaces": [143], "states.": [146], "Our": [147], "models": [150], "trained": [152], "using": [153, 191], "data": [154], "collected": [155], "autonomously": [156], "continuously": [158], "robot": [161], "interacting": [162], "hundreds": [164], "objects,": [166], "without": [167], "human": [168], "supervision.": [169], "demonstrate": [171], "visual": [173], "MPC": [174], "generalize": [176], "objects---both": [179], "rigid": [180], "deformable---and": [182], "solve": [183], "range": [185], "user-defined": [187], "same": [193], "model.": [194]}, "counts_by_year": [{"year": 2026, "cited_by_count": 1}, {"year": 2025, "cited_by_count": 5}, {"year": 2024, "cited_by_count": 22}, {"year": 2023, "cited_by_count": 39}, {"year": 2022, "cited_by_count": 26}, {"year": 2021, "cited_by_count": 74}, {"year": 2020, "cited_by_count": 67}, {"year": 2019, "cited_by_count": 29}, {"year": 2018, "cited_by_count": 1}], "updated_date": "2026-04-12T07:58:50.170612", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W1525280637", "doi": "https://doi.org/10.1109/icra.2012.6225072", "title": "RTMBA: A Real-Time Model-Based Reinforcement Learning Architecture for robot control", "display_name": "RTMBA: A Real-Time Model-Based Reinforcement Learning Architecture for robot control", "relevance_score": 350.86353, "publication_year": 2012, "publication_date": "2012-05-01", "ids": {"openalex": "https://openalex.org/W1525280637", "doi": "https://doi.org/10.1109/icra.2012.6225072", "mag": "1525280637"}, "language": "en", "primary_location": {"id": "doi:10.1109/icra.2012.6225072", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/icra.2012.6225072", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "2012 IEEE International Conference on Robotics and Automation", "raw_type": "proceedings-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5048229171", "display_name": "Todd Hester", "orcid": null}, "institutions": [{"id": "https://openalex.org/I86519309", "display_name": "The University of Texas at Austin", "ror": "https://ror.org/00hj54h04", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I86519309"]}], "countries": ["US"], "is_corresponding": true, "raw_author_name": "Todd Hester", "raw_affiliation_strings": ["Department of Computer Science, University of Texas, Austin, USA", "Department of Computer Science, The University of Texas at Austin, USA,"], "affiliations": [{"raw_affiliation_string": "Department of Computer Science, University of Texas, Austin, USA", "institution_ids": ["https://openalex.org/I86519309"]}, {"raw_affiliation_string": "Department of Computer Science, The University of Texas at Austin, USA,", "institution_ids": ["https://openalex.org/I86519309"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5074285715", "display_name": "Michael Quinlan", "orcid": null}, "institutions": [{"id": "https://openalex.org/I86519309", "display_name": "The University of Texas at Austin", "ror": "https://ror.org/00hj54h04", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I86519309"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Michael Quinlan", "raw_affiliation_strings": ["Department of Computer Science, University of Texas, Austin, USA", "Department of Computer Science, The University of Texas at Austin, USA,"], "affiliations": [{"raw_affiliation_string": "Department of Computer Science, University of Texas, Austin, USA", "institution_ids": ["https://openalex.org/I86519309"]}, {"raw_affiliation_string": "Department of Computer Science, The University of Texas at Austin, USA,", "institution_ids": ["https://openalex.org/I86519309"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5001594330", "display_name": "Peter Stone", "orcid": "https://orcid.org/0000-0002-6795-420X"}, "institutions": [{"id": "https://openalex.org/I86519309", "display_name": "The University of Texas at Austin", "ror": "https://ror.org/00hj54h04", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I86519309"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Peter Stone", "raw_affiliation_strings": ["Department of Computer Science, University of Texas, Austin, USA", "Department of Computer Science, The University of Texas at Austin, USA,"], "affiliations": [{"raw_affiliation_string": "Department of Computer Science, University of Texas, Austin, USA", "institution_ids": ["https://openalex.org/I86519309"]}, {"raw_affiliation_string": "Department of Computer Science, The University of Texas at Austin, USA,", "institution_ids": ["https://openalex.org/I86519309"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 3, "corresponding_author_ids": ["https://openalex.org/A5048229171"], "corresponding_institution_ids": ["https://openalex.org/I86519309"], "apc_list": null, "apc_paid": null, "fwci": 8.8425, "has_fulltext": false, "cited_by_count": 86, "citation_normalized_percentile": {"value": 0.97636499, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 94, "max": 99}, "biblio": {"volume": null, "issue": null, "first_page": "85", "last_page": "90"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.8855999708175659, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.8855999708175659, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11099", "display_name": "Autonomous Vehicle Technology and Safety", "score": 0.02070000022649765, "subfield": {"id": "https://openalex.org/subfields/2203", "display_name": "Automotive Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.019999999552965164, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8828479051589966}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.810511589050293}, {"id": "https://openalex.org/keywords/architecture", "display_name": "Architecture", "score": 0.6434780359268188}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.6390858888626099}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.592674195766449}, {"id": "https://openalex.org/keywords/process", "display_name": "Process (computing)", "score": 0.5577608346939087}, {"id": "https://openalex.org/keywords/robot-learning", "display_name": "Robot learning", "score": 0.5256758332252502}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.49630218744277954}, {"id": "https://openalex.org/keywords/action", "display_name": "Action (physics)", "score": 0.4356013834476471}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.36971479654312134}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.3013526201248169}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8828479051589966}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.810511589050293}, {"id": "https://openalex.org/C123657996", "wikidata": "https://www.wikidata.org/wiki/Q12271", "display_name": "Architecture", "level": 2, "score": 0.6434780359268188}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.6390858888626099}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.592674195766449}, {"id": "https://openalex.org/C98045186", "wikidata": "https://www.wikidata.org/wiki/Q205663", "display_name": "Process (computing)", "level": 2, "score": 0.5577608346939087}, {"id": "https://openalex.org/C188888258", "wikidata": "https://www.wikidata.org/wiki/Q7353390", "display_name": "Robot learning", "level": 4, "score": 0.5256758332252502}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.49630218744277954}, {"id": "https://openalex.org/C2780791683", "wikidata": "https://www.wikidata.org/wiki/Q846785", "display_name": "Action (physics)", "level": 2, "score": 0.4356013834476471}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.36971479654312134}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.3013526201248169}, {"id": "https://openalex.org/C153349607", "wikidata": "https://www.wikidata.org/wiki/Q36649", "display_name": "Visual arts", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C142362112", "wikidata": "https://www.wikidata.org/wiki/Q735", "display_name": "Art", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C62520636", "wikidata": "https://www.wikidata.org/wiki/Q944", "display_name": "Quantum mechanics", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 3, "locations": [{"id": "doi:10.1109/icra.2012.6225072", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/icra.2012.6225072", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "2012 IEEE International Conference on Robotics and Automation", "raw_type": "proceedings-article"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.221.3908", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.221.3908", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://www.cs.utexas.edu/%7Epstone/Papers/bib2html-links/ICRA12-hester.pdf", "raw_type": "text"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.765.4538", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.765.4538", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://arxiv.org/pdf/1105.1749.pdf", "raw_type": "text"}], "best_oa_location": null, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/16", "score": 0.6499999761581421, "display_name": "Peace, Justice and strong institutions"}], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 31, "referenced_works": ["https://openalex.org/W1491843047", "https://openalex.org/W1515308897", "https://openalex.org/W1560495008", "https://openalex.org/W1590157819", "https://openalex.org/W1595483645", "https://openalex.org/W1625390266", "https://openalex.org/W2097778153", "https://openalex.org/W2103048296", "https://openalex.org/W2108734173", "https://openalex.org/W2116695029", "https://openalex.org/W2121863487", "https://openalex.org/W2132622533", "https://openalex.org/W2132676037", "https://openalex.org/W2140135625", "https://openalex.org/W2153192722", "https://openalex.org/W2161966552", "https://openalex.org/W2901136733", "https://openalex.org/W3011120880", "https://openalex.org/W3020747880", "https://openalex.org/W3023151133", "https://openalex.org/W4214717370", "https://openalex.org/W4242606736", "https://openalex.org/W4285719527", "https://openalex.org/W6636578284", "https://openalex.org/W6675223484", "https://openalex.org/W6677128644", "https://openalex.org/W6679365036", "https://openalex.org/W6680657880", "https://openalex.org/W6682567211", "https://openalex.org/W6683603353", "https://openalex.org/W6756486208"], "related_works": ["https://openalex.org/W2122871747", "https://openalex.org/W3114279067", "https://openalex.org/W2930863966", "https://openalex.org/W2126211886", "https://openalex.org/W3153786280", "https://openalex.org/W3127551068", "https://openalex.org/W4220829754", "https://openalex.org/W2165180011", "https://openalex.org/W3165944253", "https://openalex.org/W2351343564"], "abstract_inverted_index": {"Reinforcement": [0], "Learning": [1], "(RL)": [2], "is": [3, 112], "a": [4, 73, 104], "paradigm": [5], "for": [6, 29, 64, 77, 115], "learning": [7], "decision-making": [8], "tasks": [9, 150], "that": [10, 80, 108, 122, 151], "could": [11], "enable": [12], "robots": [13], "to": [14, 18, 26], "learn": [15, 35, 50], "and": [16, 93, 100, 144], "adapt": [17], "their": [19], "situation": [20], "on-line.": [21], "For": [22], "an": [23, 158], "RL": [24, 48, 79], "algorithm": [25], "be": [27], "practical": [28, 65], "robotic": [30], "control": [31, 118], "tasks,": [32], "it": [33], "must": [34], "in": [36, 44, 51, 82, 103], "very": [37], "few": [38, 53], "samples,": [39, 54], "while": [40], "continually": [41], "taking": [42, 86], "actions": [43, 154], "real-time.": [45], "Existing": [46], "model-based": [47, 78], "methods": [49, 92, 132, 148], "relatively": [52], "but": [55], "typically": [56], "take": [57], "too": [58], "much": [59], "time": [60], "between": [61], "each": [62], "action": [63], "on-line": [66], "learning.": [67], "In": [68], "this": [69, 125], "paper,": [70], "we": [71], "present": [72], "novel": [74, 105], "parallel": [75], "architecture": [76, 126, 137], "runs": [81], "real-time": [83, 153], "by": [84], "1)": [85], "advantage": [87], "of": [88], "sample-based": [89], "approximate": [90], "planning": [91, 101], "2)": [94], "parallelizing": [95], "the": [96, 109, 134], "acting,": [97], "model": [98], "learning,": [99], "processes": [102], "way": [106], "such": [107, 155], "acting": [110], "process": [111], "sufficiently": [113], "fast": [114], "typical": [116, 135], "robot": [117], "cycles.": [119], "We": [120], "demonstrate": [121], "algorithms": [123], "using": [124, 133], "perform": [127], "nearly": [128], "as": [129, 131, 156], "well": [130], "sequential": [136], "when": [138], "both": [139], "are": [140], "given": [141], "unlimited": [142], "time,": [143], "greatly": [145], "out-perform": [146], "these": [147], "on": [149], "require": [152], "controlling": [157], "autonomous": [159], "vehicle.": [160]}, "counts_by_year": [{"year": 2025, "cited_by_count": 8}, {"year": 2023, "cited_by_count": 6}, {"year": 2022, "cited_by_count": 4}, {"year": 2021, "cited_by_count": 8}, {"year": 2020, "cited_by_count": 4}, {"year": 2019, "cited_by_count": 5}, {"year": 2018, "cited_by_count": 5}, {"year": 2017, "cited_by_count": 5}, {"year": 2016, "cited_by_count": 5}, {"year": 2015, "cited_by_count": 7}, {"year": 2014, "cited_by_count": 2}, {"year": 2013, "cited_by_count": 8}, {"year": 2012, "cited_by_count": 3}], "updated_date": "2026-04-05T17:49:38.594831", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3042045548", "doi": "https://doi.org/10.1007/s13042-020-01167-7", "title": "Multi-agent reinforcement learning for redundant robot control in task-space", "display_name": "Multi-agent reinforcement learning for redundant robot control in task-space", "relevance_score": 348.7341, "publication_year": 2020, "publication_date": "2020-07-09", "ids": {"openalex": "https://openalex.org/W3042045548", "doi": "https://doi.org/10.1007/s13042-020-01167-7", "mag": "3042045548"}, "language": "en", "primary_location": {"id": "doi:10.1007/s13042-020-01167-7", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/s13042-020-01167-7", "pdf_url": null, "source": {"id": "https://openalex.org/S2764999920", "display_name": "International Journal of Machine Learning and Cybernetics", "issn_l": "1868-8071", "issn": ["1868-8071", "1868-808X"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "International Journal of Machine Learning and Cybernetics", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5065156490", "display_name": "Adolfo Perrusqu\u00eda", "orcid": "https://orcid.org/0000-0003-2290-1160"}, "institutions": [{"id": "https://openalex.org/I68368234", "display_name": "Center for Research and Advanced Studies of the National Polytechnic Institute", "ror": "https://ror.org/009eqmr18", "country_code": "MX", "type": "facility", "lineage": ["https://openalex.org/I59361560", "https://openalex.org/I68368234"]}], "countries": ["MX"], "is_corresponding": true, "raw_author_name": "Adolfo Perrusqu\u00eda", "raw_affiliation_strings": ["Departamento de Control Autom\u00e1tico, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Departamento de Control Autom\u00e1tico, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico", "institution_ids": ["https://openalex.org/I68368234"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5008201587", "display_name": "Wen Yu", "orcid": "https://orcid.org/0000-0002-9540-7924"}, "institutions": [{"id": "https://openalex.org/I68368234", "display_name": "Center for Research and Advanced Studies of the National Polytechnic Institute", "ror": "https://ror.org/009eqmr18", "country_code": "MX", "type": "facility", "lineage": ["https://openalex.org/I59361560", "https://openalex.org/I68368234"]}], "countries": ["MX"], "is_corresponding": false, "raw_author_name": "Wen Yu", "raw_affiliation_strings": ["Departamento de Control Autom\u00e1tico, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Departamento de Control Autom\u00e1tico, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico", "institution_ids": ["https://openalex.org/I68368234"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5100683740", "display_name": "Xiaoou Li", "orcid": "https://orcid.org/0000-0003-3087-7375"}, "institutions": [{"id": "https://openalex.org/I68368234", "display_name": "Center for Research and Advanced Studies of the National Polytechnic Institute", "ror": "https://ror.org/009eqmr18", "country_code": "MX", "type": "facility", "lineage": ["https://openalex.org/I59361560", "https://openalex.org/I68368234"]}], "countries": ["MX"], "is_corresponding": false, "raw_author_name": "Xiaoou Li", "raw_affiliation_strings": ["Departamento de Computaci\u00f3n, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Departamento de Computaci\u00f3n, CINVESTAV-IPN (National Polytechnic Institute), Mexico City, Mexico", "institution_ids": ["https://openalex.org/I68368234"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 3, "corresponding_author_ids": ["https://openalex.org/A5065156490"], "corresponding_institution_ids": ["https://openalex.org/I68368234"], "apc_list": {"value": 2790, "currency": "EUR", "value_usd": 3590}, "apc_paid": null, "fwci": 6.2628, "has_fulltext": false, "cited_by_count": 79, "citation_normalized_percentile": {"value": 0.97086309, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 96, "max": 100}, "biblio": {"volume": "12", "issue": "1", "first_page": "231", "last_page": "241"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9965999722480774, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10040", "display_name": "Adaptive Control of Nonlinear Systems", "score": 0.9860000014305115, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/jacobian-matrix-and-determinant", "display_name": "Jacobian matrix and determinant", "score": 0.7715538144111633}, {"id": "https://openalex.org/keywords/inverse-kinematics", "display_name": "Inverse kinematics", "score": 0.7142828702926636}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7079681158065796}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5996822714805603}, {"id": "https://openalex.org/keywords/kinematics", "display_name": "Kinematics", "score": 0.5811013579368591}, {"id": "https://openalex.org/keywords/artificial-neural-network", "display_name": "Artificial neural network", "score": 0.5181699991226196}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.4868899881839752}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.4809914827346802}, {"id": "https://openalex.org/keywords/robot-kinematics", "display_name": "Robot kinematics", "score": 0.4620141088962555}, {"id": "https://openalex.org/keywords/degrees-of-freedom", "display_name": "Degrees of freedom (physics and chemistry)", "score": 0.43239620327949524}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.2401578426361084}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.11279842257499695}], "concepts": [{"id": "https://openalex.org/C200331156", "wikidata": "https://www.wikidata.org/wiki/Q506041", "display_name": "Jacobian matrix and determinant", "level": 2, "score": 0.7715538144111633}, {"id": "https://openalex.org/C17816587", "wikidata": "https://www.wikidata.org/wiki/Q1501872", "display_name": "Inverse kinematics", "level": 3, "score": 0.7142828702926636}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7079681158065796}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5996822714805603}, {"id": "https://openalex.org/C39920418", "wikidata": "https://www.wikidata.org/wiki/Q11476", "display_name": "Kinematics", "level": 2, "score": 0.5811013579368591}, {"id": "https://openalex.org/C50644808", "wikidata": "https://www.wikidata.org/wiki/Q192776", "display_name": "Artificial neural network", "level": 2, "score": 0.5181699991226196}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.4868899881839752}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.4809914827346802}, {"id": "https://openalex.org/C74222875", "wikidata": "https://www.wikidata.org/wiki/Q16000312", "display_name": "Robot kinematics", "level": 4, "score": 0.4620141088962555}, {"id": "https://openalex.org/C208081375", "wikidata": "https://www.wikidata.org/wiki/Q274502", "display_name": "Degrees of freedom (physics and chemistry)", "level": 2, "score": 0.43239620327949524}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.2401578426361084}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.11279842257499695}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C62520636", "wikidata": "https://www.wikidata.org/wiki/Q944", "display_name": "Quantum mechanics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C74650414", "wikidata": "https://www.wikidata.org/wiki/Q11397", "display_name": "Classical mechanics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C28826006", "wikidata": "https://www.wikidata.org/wiki/Q33521", "display_name": "Applied mathematics", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1007/s13042-020-01167-7", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/s13042-020-01167-7", "pdf_url": null, "source": {"id": "https://openalex.org/S2764999920", "display_name": "International Journal of Machine Learning and Cybernetics", "issn_l": "1868-8071", "issn": ["1868-8071", "1868-808X"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "International Journal of Machine Learning and Cybernetics", "raw_type": "journal-article"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 53, "referenced_works": ["https://openalex.org/W206679605", "https://openalex.org/W613314169", "https://openalex.org/W1511864092", "https://openalex.org/W1579240551", "https://openalex.org/W1626155273", "https://openalex.org/W1966086707", "https://openalex.org/W1977655452", "https://openalex.org/W1979638690", "https://openalex.org/W1980417427", "https://openalex.org/W2009563859", "https://openalex.org/W2012587148", "https://openalex.org/W2021004298", "https://openalex.org/W2042882799", "https://openalex.org/W2065253294", "https://openalex.org/W2101745110", "https://openalex.org/W2102623571", "https://openalex.org/W2112566751", "https://openalex.org/W2121863487", "https://openalex.org/W2134140787", "https://openalex.org/W2134159355", "https://openalex.org/W2140135625", "https://openalex.org/W2141543600", "https://openalex.org/W2152161277", "https://openalex.org/W2152445738", "https://openalex.org/W2165131254", "https://openalex.org/W2165150801", "https://openalex.org/W2166513231", "https://openalex.org/W2294330364", "https://openalex.org/W2490314460", "https://openalex.org/W2519894412", "https://openalex.org/W2736601468", "https://openalex.org/W2758033803", "https://openalex.org/W2773735557", "https://openalex.org/W2790154185", "https://openalex.org/W2791291025", "https://openalex.org/W2792718656", "https://openalex.org/W2792799644", "https://openalex.org/W2898563784", "https://openalex.org/W2943867327", "https://openalex.org/W2956146357", "https://openalex.org/W2963252619", "https://openalex.org/W2965916140", "https://openalex.org/W2966272253", "https://openalex.org/W2972280472", "https://openalex.org/W2982651739", "https://openalex.org/W3000371092", "https://openalex.org/W3007973969", "https://openalex.org/W3009594751", "https://openalex.org/W3024744364", "https://openalex.org/W3144155561", "https://openalex.org/W4205513846", "https://openalex.org/W4214717370", "https://openalex.org/W6814003322"], "related_works": ["https://openalex.org/W2115240519", "https://openalex.org/W2080642692", "https://openalex.org/W2039927751", "https://openalex.org/W2744818472", "https://openalex.org/W1253671258", "https://openalex.org/W2731862817", "https://openalex.org/W4283266117", "https://openalex.org/W2355512144", "https://openalex.org/W3134555460", "https://openalex.org/W2607470227"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2026, "cited_by_count": 3}, {"year": 2025, "cited_by_count": 9}, {"year": 2024, "cited_by_count": 24}, {"year": 2023, "cited_by_count": 10}, {"year": 2022, "cited_by_count": 16}, {"year": 2021, "cited_by_count": 14}, {"year": 2020, "cited_by_count": 3}], "updated_date": "2026-05-09T13:55:54.758798", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3195968524", "doi": "https://doi.org/10.1146/annurev-control-042920-020211", "title": "Safe Learning in Robotics: From Learning-Based Control to Safe Reinforcement Learning", "display_name": "Safe Learning in Robotics: From Learning-Based Control to Safe Reinforcement Learning", "relevance_score": 331.2049, "publication_year": 2022, "publication_date": "2022-01-26", "ids": {"openalex": "https://openalex.org/W3195968524", "doi": "https://doi.org/10.1146/annurev-control-042920-020211", "mag": "3195968524"}, "language": "en", "primary_location": {"id": "doi:10.1146/annurev-control-042920-020211", "is_oa": true, "landing_page_url": "https://doi.org/10.1146/annurev-control-042920-020211", "pdf_url": "https://www.annualreviews.org/doi/pdf/10.1146/annurev-control-042920-020211", "source": {"id": "https://openalex.org/S4210191328", "display_name": "Annual Review of Control Robotics and Autonomous Systems", "issn_l": "2573-5144", "issn": ["2573-5144"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320373", "host_organization_name": "Annual Reviews", "host_organization_lineage": ["https://openalex.org/P4310320373"], "host_organization_lineage_names": ["Annual Reviews"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Annual Review of Control, Robotics, and Autonomous Systems", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "bronze", "oa_url": "https://www.annualreviews.org/doi/pdf/10.1146/annurev-control-042920-020211", "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5011540585", "display_name": "Lukas Brunke", "orcid": "https://orcid.org/0000-0002-9893-9889"}, "institutions": [{"id": "https://openalex.org/I185261750", "display_name": "University of Toronto", "ror": "https://ror.org/03dbr7087", "country_code": "CA", "type": "education", "lineage": ["https://openalex.org/I185261750"]}, {"id": "https://openalex.org/I4210127509", "display_name": "Vector Institute", "ror": "https://ror.org/03kqdja62", "country_code": "CA", "type": "facility", "lineage": ["https://openalex.org/I4210127509"]}], "countries": ["CA"], "is_corresponding": true, "raw_author_name": "Lukas Brunke", "raw_affiliation_strings": ["Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I4210127509"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5019695635", "display_name": "Melissa Greeff", "orcid": "https://orcid.org/0000-0002-1708-3709"}, "institutions": [{"id": "https://openalex.org/I185261750", "display_name": "University of Toronto", "ror": "https://ror.org/03dbr7087", "country_code": "CA", "type": "education", "lineage": ["https://openalex.org/I185261750"]}, {"id": "https://openalex.org/I4210127509", "display_name": "Vector Institute", "ror": "https://ror.org/03kqdja62", "country_code": "CA", "type": "facility", "lineage": ["https://openalex.org/I4210127509"]}], "countries": ["CA"], "is_corresponding": false, "raw_author_name": "Melissa Greeff", "raw_affiliation_strings": ["Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I4210127509"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5030315406", "display_name": "Adam W. Hall", "orcid": "https://orcid.org/0000-0002-1461-923X"}, "institutions": [{"id": "https://openalex.org/I185261750", "display_name": "University of Toronto", "ror": "https://ror.org/03dbr7087", "country_code": "CA", "type": "education", "lineage": ["https://openalex.org/I185261750"]}, {"id": "https://openalex.org/I4210127509", "display_name": "Vector Institute", "ror": "https://ror.org/03kqdja62", "country_code": "CA", "type": "facility", "lineage": ["https://openalex.org/I4210127509"]}], "countries": ["CA"], "is_corresponding": false, "raw_author_name": "Adam W. Hall", "raw_affiliation_strings": ["Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I4210127509"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5006446172", "display_name": "Zhaocong Yuan", "orcid": null}, "institutions": [{"id": "https://openalex.org/I185261750", "display_name": "University of Toronto", "ror": "https://ror.org/03dbr7087", "country_code": "CA", "type": "education", "lineage": ["https://openalex.org/I185261750"]}, {"id": "https://openalex.org/I4210127509", "display_name": "Vector Institute", "ror": "https://ror.org/03kqdja62", "country_code": "CA", "type": "facility", "lineage": ["https://openalex.org/I4210127509"]}], "countries": ["CA"], "is_corresponding": false, "raw_author_name": "Zhaocong Yuan", "raw_affiliation_strings": ["Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I4210127509"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5048199386", "display_name": "Siqi Zhou", "orcid": "https://orcid.org/0000-0001-7240-546X"}, "institutions": [{"id": "https://openalex.org/I185261750", "display_name": "University of Toronto", "ror": "https://ror.org/03dbr7087", "country_code": "CA", "type": "education", "lineage": ["https://openalex.org/I185261750"]}, {"id": "https://openalex.org/I4210127509", "display_name": "Vector Institute", "ror": "https://ror.org/03kqdja62", "country_code": "CA", "type": "facility", "lineage": ["https://openalex.org/I4210127509"]}], "countries": ["CA"], "is_corresponding": false, "raw_author_name": "Siqi Zhou", "raw_affiliation_strings": ["Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I4210127509"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5038164962", "display_name": "Jacopo Panerati", "orcid": "https://orcid.org/0000-0003-2994-5422"}, "institutions": [{"id": "https://openalex.org/I185261750", "display_name": "University of Toronto", "ror": "https://ror.org/03dbr7087", "country_code": "CA", "type": "education", "lineage": ["https://openalex.org/I185261750"]}, {"id": "https://openalex.org/I4210127509", "display_name": "Vector Institute", "ror": "https://ror.org/03kqdja62", "country_code": "CA", "type": "facility", "lineage": ["https://openalex.org/I4210127509"]}], "countries": ["CA"], "is_corresponding": false, "raw_author_name": "Jacopo Panerati", "raw_affiliation_strings": ["Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I4210127509"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5052147335", "display_name": "Angela P. Schoellig", "orcid": "https://orcid.org/0000-0003-4012-4668"}, "institutions": [{"id": "https://openalex.org/I185261750", "display_name": "University of Toronto", "ror": "https://ror.org/03dbr7087", "country_code": "CA", "type": "education", "lineage": ["https://openalex.org/I185261750"]}, {"id": "https://openalex.org/I4210127509", "display_name": "Vector Institute", "ror": "https://ror.org/03kqdja62", "country_code": "CA", "type": "facility", "lineage": ["https://openalex.org/I4210127509"]}], "countries": ["CA"], "is_corresponding": false, "raw_author_name": "Angela P. Schoellig", "raw_affiliation_strings": ["Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Institute for Aerospace Studies, University of Toronto, Toronto, Ontario, Canada;, , , , , ,", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "University of Toronto Robotics Institute, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I185261750"]}, {"raw_affiliation_string": "Vector Institute for Artificial Intelligence, Toronto, Ontario, Canada", "institution_ids": ["https://openalex.org/I4210127509"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 7, "corresponding_author_ids": ["https://openalex.org/A5011540585"], "corresponding_institution_ids": ["https://openalex.org/I185261750", "https://openalex.org/I4210127509"], "apc_list": null, "apc_paid": null, "fwci": 69.7484, "has_fulltext": true, "cited_by_count": 647, "citation_normalized_percentile": {"value": 1.0, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 99, "max": 100}, "biblio": {"volume": "5", "issue": "1", "first_page": "411", "last_page": "444"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10876", "display_name": "Fault Detection and Control Systems", "score": 0.9987000226974487, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10876", "display_name": "Fault Detection and Control Systems", "score": 0.9987000226974487, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9939000010490417, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12423", "display_name": "Software Reliability and Analysis Research", "score": 0.9821000099182129, "subfield": {"id": "https://openalex.org/subfields/1712", "display_name": "Software"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8415877819061279}, {"id": "https://openalex.org/keywords/robot-learning", "display_name": "Robot learning", "score": 0.6769211292266846}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6600054502487183}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5778108835220337}, {"id": "https://openalex.org/keywords/leverage", "display_name": "Leverage (statistics)", "score": 0.5203349590301514}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.49297666549682617}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.49101710319519043}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.4623512029647827}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.1768949031829834}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8415877819061279}, {"id": "https://openalex.org/C188888258", "wikidata": "https://www.wikidata.org/wiki/Q7353390", "display_name": "Robot learning", "level": 4, "score": 0.6769211292266846}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6600054502487183}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5778108835220337}, {"id": "https://openalex.org/C153083717", "wikidata": "https://www.wikidata.org/wiki/Q6535263", "display_name": "Leverage (statistics)", "level": 2, "score": 0.5203349590301514}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.49297666549682617}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.49101710319519043}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.4623512029647827}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.1768949031829834}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1146/annurev-control-042920-020211", "is_oa": true, "landing_page_url": "https://doi.org/10.1146/annurev-control-042920-020211", "pdf_url": "https://www.annualreviews.org/doi/pdf/10.1146/annurev-control-042920-020211", "source": {"id": "https://openalex.org/S4210191328", "display_name": "Annual Review of Control Robotics and Autonomous Systems", "issn_l": "2573-5144", "issn": ["2573-5144"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320373", "host_organization_name": "Annual Reviews", "host_organization_lineage": ["https://openalex.org/P4310320373"], "host_organization_lineage_names": ["Annual Reviews"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Annual Review of Control, Robotics, and Autonomous Systems", "raw_type": "journal-article"}], "best_oa_location": {"id": "doi:10.1146/annurev-control-042920-020211", "is_oa": true, "landing_page_url": "https://doi.org/10.1146/annurev-control-042920-020211", "pdf_url": "https://www.annualreviews.org/doi/pdf/10.1146/annurev-control-042920-020211", "source": {"id": "https://openalex.org/S4210191328", "display_name": "Annual Review of Control Robotics and Autonomous Systems", "issn_l": "2573-5144", "issn": ["2573-5144"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320373", "host_organization_name": "Annual Reviews", "host_organization_lineage": ["https://openalex.org/P4310320373"], "host_organization_lineage_names": ["Annual Reviews"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Annual Review of Control, Robotics, and Autonomous Systems", "raw_type": "journal-article"}, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/16", "score": 0.800000011920929, "display_name": "Peace, Justice and strong institutions"}], "awards": [], "funders": [{"id": "https://openalex.org/F4320309949", "display_name": "Canadian Institute for Advanced Research", "ror": "https://ror.org/01sdtdd95"}, {"id": "https://openalex.org/F4320320994", "display_name": "Canada Research Chairs", "ror": "https://ror.org/0517h6h17"}, {"id": "https://openalex.org/F4320334593", "display_name": "Natural Sciences and Engineering Research Council of Canada", "ror": "https://ror.org/01h531d29"}], "has_content": {"pdf": true, "grobid_xml": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W3195968524.pdf", "grobid_xml": "https://content.openalex.org/works/W3195968524.grobid-xml"}, "referenced_works_count": 182, "referenced_works": ["https://openalex.org/W126555446", "https://openalex.org/W130083205", "https://openalex.org/W1518931405", "https://openalex.org/W1522399516", "https://openalex.org/W1530960252", "https://openalex.org/W1710476689", "https://openalex.org/W1771410628", "https://openalex.org/W1845972764", "https://openalex.org/W1963790880", "https://openalex.org/W1965878388", "https://openalex.org/W1977655452", "https://openalex.org/W1981723834", "https://openalex.org/W1997543377", "https://openalex.org/W2006859604", "https://openalex.org/W2038798729", "https://openalex.org/W2041242313", "https://openalex.org/W2047993072", "https://openalex.org/W2065313971", "https://openalex.org/W2085330455", "https://openalex.org/W2089551119", "https://openalex.org/W2091219706", "https://openalex.org/W2100134099", "https://openalex.org/W2101494153", "https://openalex.org/W2105078254", "https://openalex.org/W2116364955", "https://openalex.org/W2120034667", "https://openalex.org/W2121863487", "https://openalex.org/W2137002893", "https://openalex.org/W2143346970", "https://openalex.org/W2145339207", "https://openalex.org/W2149250004", "https://openalex.org/W2151237105", "https://openalex.org/W2159038290", "https://openalex.org/W2184993170", "https://openalex.org/W2236244207", "https://openalex.org/W2251588794", "https://openalex.org/W2396317032", "https://openalex.org/W2491934829", "https://openalex.org/W2529477964", "https://openalex.org/W2559655401", "https://openalex.org/W2580909119", "https://openalex.org/W2586067474", "https://openalex.org/W2586823359", "https://openalex.org/W2591892707", "https://openalex.org/W2602963933", "https://openalex.org/W2607662938", "https://openalex.org/W2612690371", "https://openalex.org/W2618318883", "https://openalex.org/W2619551236", "https://openalex.org/W2736601468", "https://openalex.org/W2767149636", "https://openalex.org/W2768908787", "https://openalex.org/W2772589676", "https://openalex.org/W2784465508", "https://openalex.org/W2788014517", "https://openalex.org/W2788084076", "https://openalex.org/W2791704483", "https://openalex.org/W2804190906", "https://openalex.org/W2806985155", "https://openalex.org/W2822752092", "https://openalex.org/W2842089854", "https://openalex.org/W2885381174", "https://openalex.org/W2889711700", "https://openalex.org/W2892521964", "https://openalex.org/W2893059165", "https://openalex.org/W2900806034", "https://openalex.org/W2901402208", "https://openalex.org/W2910221532", "https://openalex.org/W2913300629", "https://openalex.org/W2913668833", "https://openalex.org/W2918383326", "https://openalex.org/W2919264892", "https://openalex.org/W2922275330", "https://openalex.org/W2924156739", "https://openalex.org/W2925329277", "https://openalex.org/W2928153079", "https://openalex.org/W2938834661", "https://openalex.org/W2942608247", "https://openalex.org/W2946396478", "https://openalex.org/W2949482052", "https://openalex.org/W2949608212", "https://openalex.org/W2962775887", "https://openalex.org/W2962803570", "https://openalex.org/W2962957005", "https://openalex.org/W2963082979", "https://openalex.org/W2963120839", "https://openalex.org/W2963148914", "https://openalex.org/W2963293747", "https://openalex.org/W2963423916", "https://openalex.org/W2963468788", "https://openalex.org/W2963525569", "https://openalex.org/W2963683522", "https://openalex.org/W2963809569", "https://openalex.org/W2963958573", "https://openalex.org/W2963960193", "https://openalex.org/W2963999210", "https://openalex.org/W2964044315", "https://openalex.org/W2964340170", "https://openalex.org/W2966735560", "https://openalex.org/W2967292964", "https://openalex.org/W2968104655", "https://openalex.org/W2968428478", "https://openalex.org/W2968547875", "https://openalex.org/W2968945909", "https://openalex.org/W2973076431", "https://openalex.org/W2975197937", "https://openalex.org/W2977676795", "https://openalex.org/W2979251496", "https://openalex.org/W2982095652", "https://openalex.org/W2983431491", "https://openalex.org/W2992833799", "https://openalex.org/W2994446013", "https://openalex.org/W2997273686", "https://openalex.org/W2997896361", "https://openalex.org/W3004316249", "https://openalex.org/W3009331570", "https://openalex.org/W3010871414", "https://openalex.org/W3010970557", "https://openalex.org/W3011042361", "https://openalex.org/W3011575162", "https://openalex.org/W3011769666", "https://openalex.org/W3013146177", "https://openalex.org/W3025460448", "https://openalex.org/W3031427716", "https://openalex.org/W3033003934", "https://openalex.org/W3033324992", "https://openalex.org/W3035611392", "https://openalex.org/W3037207827", "https://openalex.org/W3038180127", "https://openalex.org/W3039984786", "https://openalex.org/W3043502726", "https://openalex.org/W3046925685", "https://openalex.org/W3047166264", "https://openalex.org/W3085436547", "https://openalex.org/W3089427344", "https://openalex.org/W3091444389", "https://openalex.org/W3094117125", "https://openalex.org/W3095135648", "https://openalex.org/W3095964404", "https://openalex.org/W3096360491", "https://openalex.org/W3096539512", "https://openalex.org/W3098870475", "https://openalex.org/W3099352109", "https://openalex.org/W3099548126", "https://openalex.org/W3100789280", "https://openalex.org/W3102552342", "https://openalex.org/W3105252106", "https://openalex.org/W3106002372", "https://openalex.org/W3106152188", "https://openalex.org/W3109723275", "https://openalex.org/W3110392270", "https://openalex.org/W3112804989", "https://openalex.org/W3120459386", "https://openalex.org/W3123298421", "https://openalex.org/W3125460529", "https://openalex.org/W3125783951", "https://openalex.org/W3129049248", "https://openalex.org/W3129503641", "https://openalex.org/W3130843035", "https://openalex.org/W3132741523", "https://openalex.org/W3134537774", "https://openalex.org/W3142005321", "https://openalex.org/W3143034939", "https://openalex.org/W3150718622", "https://openalex.org/W3163225369", "https://openalex.org/W3165692195", "https://openalex.org/W3175352502", "https://openalex.org/W3185425515", "https://openalex.org/W3186446230", "https://openalex.org/W3187239961", "https://openalex.org/W3195133498", "https://openalex.org/W3206080182", "https://openalex.org/W3207727352", "https://openalex.org/W3213719174", "https://openalex.org/W4200438872", "https://openalex.org/W4212774754", "https://openalex.org/W4214717370", "https://openalex.org/W4231017914", "https://openalex.org/W4285719527", "https://openalex.org/W4293370597", "https://openalex.org/W4299828299", "https://openalex.org/W6761443123"], "related_works": ["https://openalex.org/W2768698792", "https://openalex.org/W2930863966", "https://openalex.org/W2126211886", "https://openalex.org/W3153786280", "https://openalex.org/W3127551068", "https://openalex.org/W2643884694", "https://openalex.org/W2165180011", "https://openalex.org/W4220829754", "https://openalex.org/W3165944253", "https://openalex.org/W2351343564"], "abstract_inverted_index": {"The": [0], "last": [1], "half": [2], "decade": [3], "has": [4], "seen": [5], "a": [6, 33, 54, 101], "steep": [7], "rise": [8], "in": [9, 43, 63, 126, 137, 156], "the": [10, 24, 39, 58, 81, 98, 145, 151, 157, 162], "number": [11], "of": [12, 38, 100, 144, 153], "contributions": [13], "on": [14, 56], "safe": [15, 49], "learning": [16, 28, 46, 68, 80, 85, 155, 176], "methods": [17, 93, 111], "for": [18, 164], "real-world": [19, 127], "robotic": [20], "deployments": [21], "from": [22], "both": [23], "control": [25, 64, 73, 103, 110, 173], "and": [26, 60, 66, 92, 107, 120, 160, 174], "reinforcement": [27, 67, 84, 175], "communities.": [29], "This": [30], "article": [31], "provides": [32], "concise": [34], "but": [35], "holistic": [36], "review": [37], "recent": [40], "advances": [41], "made": [42], "using": [44], "machine": [45], "to": [47, 113, 122, 168], "achieve": [48], "decision-making": [50], "under": [51], "uncertainties,": [52], "with": [53], "focus": [55], "unifying": [57], "language": [59], "frameworks": [61], "used": [62], "theory": [65], "research.": [69], "It": [70], "includes": [71], "learning-based": [72, 108], "approaches": [74, 86], "that": [75, 87, 94, 148], "safely": [76], "improve": [77], "performance": [78], "by": [79], "uncertain": [82], "dynamics,": [83], "encourage": [88], "safety": [89, 99, 130], "or": [90], "robustness,": [91], "can": [95], "formally": [96], "certify": [97], "learned": [102], "policy.": [104], "As": [105], "data-": [106], "robot": [109, 154], "continue": [112], "gain": [114], "traction,": [115], "researchers": [116], "must": [117], "understand": [118], "when": [119, 135], "how": [121], "best": [123], "leverage": [124], "them": [125], "scenarios": [128], "where": [129], "is": [131], "imperative,": [132], "such": [133], "as": [134], "operating": [136], "close": [138], "proximityto": [139], "humans.": [140], "We": [141], "highlight": [142], "some": [143], "open": [146], "challenges": [147], "will": [149], "drive": [150], "field": [152], "coming": [158], "years,": [159], "emphasize": [161], "need": [163], "realistic": [165], "physics-based": [166], "benchmarks": [167], "facilitate": [169], "fair": [170], "comparisons": [171], "between": [172], "approaches.": [177]}, "counts_by_year": [{"year": 2026, "cited_by_count": 61}, {"year": 2025, "cited_by_count": 181}, {"year": 2024, "cited_by_count": 198}, {"year": 2023, "cited_by_count": 148}, {"year": 2022, "cited_by_count": 48}, {"year": 2021, "cited_by_count": 11}], "updated_date": "2026-05-21T09:19:25.381259", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2904746163", "doi": "https://doi.org/10.1109/icra.2019.8794127", "title": "Residual Reinforcement Learning for Robot Control", "display_name": "Residual Reinforcement Learning for Robot Control", "relevance_score": 293.58395, "publication_year": 2019, "publication_date": "2019-05-01", "ids": {"openalex": "https://openalex.org/W2904746163", "doi": "https://doi.org/10.1109/icra.2019.8794127", "mag": "2904746163"}, "language": "en", "primary_location": {"id": "doi:10.1109/icra.2019.8794127", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/icra.2019.8794127", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "2019 International Conference on Robotics and Automation (ICRA)", "raw_type": "proceedings-article"}, "type": "preprint", "indexed_in": ["arxiv", "crossref", "datacite"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": "https://arxiv.org/pdf/1812.03201", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5072927258", "display_name": "Tobias Johannink", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1325886976", "display_name": "Siemens (Germany)", "ror": "https://ror.org/059mq0909", "country_code": "DE", "type": "company", "lineage": ["https://openalex.org/I1325886976"]}, {"id": "https://openalex.org/I159176309", "display_name": "Universit\u00e4t Hamburg", "ror": "https://ror.org/00g30e956", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I159176309"]}, {"id": "https://openalex.org/I884043246", "display_name": "Hamburg University of Technology", "ror": "https://ror.org/04bs1pb34", "country_code": "DE", "type": "education", "lineage": ["https://openalex.org/I884043246"]}], "countries": ["DE"], "is_corresponding": true, "raw_author_name": "Tobias Johannink", "raw_affiliation_strings": ["Hamburg University of Technology", "SIEMENS CORPORATION"], "affiliations": [{"raw_affiliation_string": "Hamburg University of Technology", "institution_ids": ["https://openalex.org/I159176309", "https://openalex.org/I884043246"]}, {"raw_affiliation_string": "SIEMENS CORPORATION", "institution_ids": ["https://openalex.org/I1325886976"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5090307838", "display_name": "Shikhar Bahl", "orcid": null}, "institutions": [{"id": "https://openalex.org/I95457486", "display_name": "University of California, Berkeley", "ror": "https://ror.org/01an7q238", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I95457486"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Shikhar Bahl", "raw_affiliation_strings": ["University of California, Berkeley", "University of California\u2013Berkeley"], "affiliations": [{"raw_affiliation_string": "University of California, Berkeley", "institution_ids": ["https://openalex.org/I95457486"]}, {"raw_affiliation_string": "University of California\u2013Berkeley", "institution_ids": ["https://openalex.org/I95457486"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5021472921", "display_name": "Ashvin Nair", "orcid": null}, "institutions": [{"id": "https://openalex.org/I95457486", "display_name": "University of California, Berkeley", "ror": "https://ror.org/01an7q238", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I95457486"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Ashvin Nair", "raw_affiliation_strings": ["University of California, Berkeley", "University of California\u2013Berkeley"], "affiliations": [{"raw_affiliation_string": "University of California, Berkeley", "institution_ids": ["https://openalex.org/I95457486"]}, {"raw_affiliation_string": "University of California\u2013Berkeley", "institution_ids": ["https://openalex.org/I95457486"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5037464692", "display_name": "Jianlan Luo", "orcid": "https://orcid.org/0009-0008-8029-7794"}, "institutions": [{"id": "https://openalex.org/I1325886976", "display_name": "Siemens (Germany)", "ror": "https://ror.org/059mq0909", "country_code": "DE", "type": "company", "lineage": ["https://openalex.org/I1325886976"]}, {"id": "https://openalex.org/I95457486", "display_name": "University of California, Berkeley", "ror": "https://ror.org/01an7q238", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I95457486"]}], "countries": ["DE", "US"], "is_corresponding": false, "raw_author_name": "Jianlan Luo", "raw_affiliation_strings": ["University of California, Berkeley", "SIEMENS CORPORATION"], "affiliations": [{"raw_affiliation_string": "University of California, Berkeley", "institution_ids": ["https://openalex.org/I95457486"]}, {"raw_affiliation_string": "SIEMENS CORPORATION", "institution_ids": ["https://openalex.org/I1325886976"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5102024670", "display_name": "Avinash Kumar", "orcid": "https://orcid.org/0000-0001-9021-1343"}, "institutions": [{"id": "https://openalex.org/I1325886976", "display_name": "Siemens (Germany)", "ror": "https://ror.org/059mq0909", "country_code": "DE", "type": "company", "lineage": ["https://openalex.org/I1325886976"]}], "countries": ["DE"], "is_corresponding": false, "raw_author_name": "Avinash Kumar", "raw_affiliation_strings": ["Siemens Corporation", "SIEMENS CORPORATION"], "affiliations": [{"raw_affiliation_string": "Siemens Corporation", "institution_ids": ["https://openalex.org/I1325886976"]}, {"raw_affiliation_string": "SIEMENS CORPORATION", "institution_ids": ["https://openalex.org/I1325886976"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5007532782", "display_name": "Matthias Loskyll", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1325886976", "display_name": "Siemens (Germany)", "ror": "https://ror.org/059mq0909", "country_code": "DE", "type": "company", "lineage": ["https://openalex.org/I1325886976"]}], "countries": ["DE"], "is_corresponding": false, "raw_author_name": "Matthias Loskyll", "raw_affiliation_strings": ["Siemens Corporation", "SIEMENS CORPORATION"], "affiliations": [{"raw_affiliation_string": "Siemens Corporation", "institution_ids": ["https://openalex.org/I1325886976"]}, {"raw_affiliation_string": "SIEMENS CORPORATION", "institution_ids": ["https://openalex.org/I1325886976"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5038472921", "display_name": "Juan Aparicio Ojea", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1325886976", "display_name": "Siemens (Germany)", "ror": "https://ror.org/059mq0909", "country_code": "DE", "type": "company", "lineage": ["https://openalex.org/I1325886976"]}], "countries": ["DE"], "is_corresponding": false, "raw_author_name": "Juan Aparicio Ojea", "raw_affiliation_strings": ["Siemens Corporation", "SIEMENS CORPORATION"], "affiliations": [{"raw_affiliation_string": "Siemens Corporation", "institution_ids": ["https://openalex.org/I1325886976"]}, {"raw_affiliation_string": "SIEMENS CORPORATION", "institution_ids": ["https://openalex.org/I1325886976"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5043873756", "display_name": "Eugen Solowjow", "orcid": "https://orcid.org/0000-0001-5222-3706"}, "institutions": [{"id": "https://openalex.org/I1325886976", "display_name": "Siemens (Germany)", "ror": "https://ror.org/059mq0909", "country_code": "DE", "type": "company", "lineage": ["https://openalex.org/I1325886976"]}], "countries": ["DE"], "is_corresponding": false, "raw_author_name": "Eugen Solowjow", "raw_affiliation_strings": ["Siemens Corporation", "SIEMENS CORPORATION"], "affiliations": [{"raw_affiliation_string": "Siemens Corporation", "institution_ids": ["https://openalex.org/I1325886976"]}, {"raw_affiliation_string": "SIEMENS CORPORATION", "institution_ids": ["https://openalex.org/I1325886976"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5026322200", "display_name": "Sergey Levine", "orcid": "https://orcid.org/0000-0001-6764-2743"}, "institutions": [{"id": "https://openalex.org/I95457486", "display_name": "University of California, Berkeley", "ror": "https://ror.org/01an7q238", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I95457486"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Sergey Levine", "raw_affiliation_strings": ["University of California, Berkeley", "University of California\u2013Berkeley"], "affiliations": [{"raw_affiliation_string": "University of California, Berkeley", "institution_ids": ["https://openalex.org/I95457486"]}, {"raw_affiliation_string": "University of California\u2013Berkeley", "institution_ids": ["https://openalex.org/I95457486"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 9, "corresponding_author_ids": ["https://openalex.org/A5072927258"], "corresponding_institution_ids": ["https://openalex.org/I1325886976", "https://openalex.org/I159176309", "https://openalex.org/I884043246"], "apc_list": null, "apc_paid": null, "fwci": 4.5485, "has_fulltext": true, "cited_by_count": 45, "citation_normalized_percentile": {"value": 0.94920764, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 90, "max": 99}, "biblio": {"volume": null, "issue": null, "first_page": "6023", "last_page": "6029"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9958999752998352, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11023", "display_name": "Prosthetics and Rehabilitation Robotics", "score": 0.948199987411499, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7731482982635498}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.6219316124916077}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.5758423805236816}, {"id": "https://openalex.org/keywords/residual", "display_name": "Residual", "score": 0.5392751693725586}, {"id": "https://openalex.org/keywords/block", "display_name": "Block (permutation group theory)", "score": 0.5209875702857971}, {"id": "https://openalex.org/keywords/control-engineering", "display_name": "Control engineering", "score": 0.5024051666259766}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.49407151341438293}, {"id": "https://openalex.org/keywords/superposition-principle", "display_name": "Superposition principle", "score": 0.48952898383140564}, {"id": "https://openalex.org/keywords/motion-control", "display_name": "Motion control", "score": 0.44923266768455505}, {"id": "https://openalex.org/keywords/control-theory", "display_name": "Control theory (sociology)", "score": 0.44504427909851074}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.3782493770122528}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.2425406575202942}, {"id": "https://openalex.org/keywords/algorithm", "display_name": "Algorithm", "score": 0.1406688392162323}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.11022016406059265}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7731482982635498}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.6219316124916077}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.5758423805236816}, {"id": "https://openalex.org/C155512373", "wikidata": "https://www.wikidata.org/wiki/Q287450", "display_name": "Residual", "level": 2, "score": 0.5392751693725586}, {"id": "https://openalex.org/C2777210771", "wikidata": "https://www.wikidata.org/wiki/Q4927124", "display_name": "Block (permutation group theory)", "level": 2, "score": 0.5209875702857971}, {"id": "https://openalex.org/C133731056", "wikidata": "https://www.wikidata.org/wiki/Q4917288", "display_name": "Control engineering", "level": 1, "score": 0.5024051666259766}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.49407151341438293}, {"id": "https://openalex.org/C27753989", "wikidata": "https://www.wikidata.org/wiki/Q284885", "display_name": "Superposition principle", "level": 2, "score": 0.48952898383140564}, {"id": "https://openalex.org/C145565327", "wikidata": "https://www.wikidata.org/wiki/Q852514", "display_name": "Motion control", "level": 3, "score": 0.44923266768455505}, {"id": "https://openalex.org/C47446073", "wikidata": "https://www.wikidata.org/wiki/Q5165890", "display_name": "Control theory (sociology)", "level": 3, "score": 0.44504427909851074}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.3782493770122528}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.2425406575202942}, {"id": "https://openalex.org/C11413529", "wikidata": "https://www.wikidata.org/wiki/Q8366", "display_name": "Algorithm", "level": 1, "score": 0.1406688392162323}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.11022016406059265}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C2524010", "wikidata": "https://www.wikidata.org/wiki/Q8087", "display_name": "Geometry", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 5, "locations": [{"id": "doi:10.1109/icra.2019.8794127", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/icra.2019.8794127", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "2019 International Conference on Robotics and Automation (ICRA)", "raw_type": "proceedings-article"}, {"id": "pmh:oai:arXiv.org:1812.03201", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1812.03201", "pdf_url": "https://arxiv.org/pdf/1812.03201", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, {"id": "mag:2904746163", "is_oa": true, "landing_page_url": "http://export.arxiv.org/pdf/1812.03201", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "arXiv (Cornell University)", "raw_type": null}, {"id": "pmh:oai:tore.tuhh.de:11420/10865", "is_oa": false, "landing_page_url": "http://hdl.handle.net/11420/10865", "pdf_url": null, "source": {"id": "https://openalex.org/S4306401751", "display_name": "tub.dok (Hamburg University of Technology)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I884043246", "host_organization_name": "Hamburg University of Technology", "host_organization_lineage": ["https://openalex.org/I884043246"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "Conference Paper"}, {"id": "doi:10.48550/arxiv.1812.03201", "is_oa": true, "landing_page_url": "https://doi.org/10.48550/arxiv.1812.03201", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": "article"}], "best_oa_location": {"id": "pmh:oai:arXiv.org:1812.03201", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1812.03201", "pdf_url": "https://arxiv.org/pdf/1812.03201", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, "sustainable_development_goals": [], "awards": [], "funders": [{"id": "https://openalex.org/F4320307782", "display_name": "Siemens USA", "ror": "https://ror.org/04axb7e79"}, {"id": "https://openalex.org/F4320337345", "display_name": "Office of Naval Research", "ror": "https://ror.org/00rk2pe57"}], "has_content": {"pdf": true, "grobid_xml": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W2904746163.pdf", "grobid_xml": "https://content.openalex.org/works/W2904746163.grobid-xml"}, "referenced_works_count": 47, "referenced_works": ["https://openalex.org/W218896052", "https://openalex.org/W745775011", "https://openalex.org/W1777239053", "https://openalex.org/W2098152875", "https://openalex.org/W2125612430", "https://openalex.org/W2140135625", "https://openalex.org/W2158782408", "https://openalex.org/W2167117957", "https://openalex.org/W2194775991", "https://openalex.org/W2415726935", "https://openalex.org/W2564090091", "https://openalex.org/W2575705757", "https://openalex.org/W2607198029", "https://openalex.org/W2626804490", "https://openalex.org/W2741122588", "https://openalex.org/W2781726626", "https://openalex.org/W2792811825", "https://openalex.org/W2794949442", "https://openalex.org/W2905364877", "https://openalex.org/W2962736495", "https://openalex.org/W2963099939", "https://openalex.org/W2963411833", "https://openalex.org/W2963864421", "https://openalex.org/W2963973314", "https://openalex.org/W2964036701", "https://openalex.org/W2964161785", "https://openalex.org/W2964248288", "https://openalex.org/W2964333597", "https://openalex.org/W6622050690", "https://openalex.org/W6629881138", "https://openalex.org/W6631533588", "https://openalex.org/W6638088447", "https://openalex.org/W6653435097", "https://openalex.org/W6680657880", "https://openalex.org/W6682849425", "https://openalex.org/W6683397050", "https://openalex.org/W6684921986", "https://openalex.org/W6697071109", "https://openalex.org/W6716193690", "https://openalex.org/W6735677848", "https://openalex.org/W6736609842", "https://openalex.org/W6737327832", "https://openalex.org/W6739585900", "https://openalex.org/W6747473740", "https://openalex.org/W6748012927", "https://openalex.org/W6748643490", "https://openalex.org/W6748839928"], "related_works": ["https://openalex.org/W2967727187", "https://openalex.org/W2905364877", "https://openalex.org/W2885163910", "https://openalex.org/W2736601468", "https://openalex.org/W2155027007", "https://openalex.org/W166135387", "https://openalex.org/W2921000986", "https://openalex.org/W3203023912", "https://openalex.org/W2300668509", "https://openalex.org/W2737488931", "https://openalex.org/W2032845773", "https://openalex.org/W2890728188", "https://openalex.org/W3170706013", "https://openalex.org/W3089337217", "https://openalex.org/W2610504928", "https://openalex.org/W3013429277", "https://openalex.org/W3120428840", "https://openalex.org/W2897428232", "https://openalex.org/W2298870990", "https://openalex.org/W3044718039"], "abstract_inverted_index": {"Conventional": [0], "feedback": [1, 130], "control": [2, 10, 30, 51, 112, 131, 143, 150], "methods": [3, 77], "can": [4, 109], "solve": [5, 110], "various": [6], "types": [7], "of": [8, 26, 57, 84, 148], "robot": [9, 87], "problems": [11, 31, 58, 96, 113], "very": [12], "efficiently": [13, 127], "by": [14, 118, 128, 156], "capturing": [15], "the": [16, 92, 115, 134], "structure": [17], "with": [18, 36, 45, 91, 139], "explicit": [19], "models,": [20], "such": [21], "as": [22], "rigid": [23], "body": [24], "equations": [25], "motion.": [27], "However,": [28], "many": [29], "in": [32, 61, 114], "modern": [33], "manufacturing": [34], "deal": [35], "contacts": [37, 169], "and": [38, 63, 100, 133, 170], "friction,": [39], "which": [40, 66, 136], "are": [41], "difficult": [42, 111], "to": [43, 54, 68, 81, 160], "capture": [44], "first-order": [46], "physical": [47], "modeling.": [48], "Hence,": [49], "applying": [50], "design": [52], "methodologies": [53], "these": [55], "kinds": [56], "often": [59], "results": [60], "brittle": [62], "inaccurate": [64], "controllers,": [65], "have": [67, 78], "be": [69, 82], "manually": [70], "tuned": [71], "for": [72, 95], "deployment.": [73], "Reinforcement": [74], "learning": [75, 85], "(RL)": [76], "been": [79], "demonstrated": [80], "capable": [83], "continuous": [86], "controllers": [88], "from": [89], "interactions": [90], "environment,": [93], "even": [94], "that": [97, 124], "include": [98], "friction": [99], "contacts.": [101], "In": [102], "this": [103], "paper,": [104], "we": [105, 108], "study": [106], "how": [107], "real": [116], "world": [117], "decomposing": [119], "them": [120], "into": [121], "a": [122, 146, 163], "part": [123], "is": [125, 137, 145], "solved": [126, 138], "conventional": [129], "methods,": [132], "residual": [135], "RL.": [140], "The": [141], "final": [142], "policy": [144], "superposition": [147], "both": [149], "signals.": [151], "We": [152], "demonstrate": [153], "our": [154], "approach": [155], "training": [157], "an": [158], "agent": [159], "successfully": [161], "perform": [162], "real-world": [164], "block": [165], "assembly": [166], "task": [167], "involving": [168], "unstable": [171], "objects.": [172]}, "counts_by_year": [{"year": 2025, "cited_by_count": 3}, {"year": 2024, "cited_by_count": 3}, {"year": 2023, "cited_by_count": 11}, {"year": 2022, "cited_by_count": 9}, {"year": 2021, "cited_by_count": 8}, {"year": 2020, "cited_by_count": 4}, {"year": 2019, "cited_by_count": 6}, {"year": 2018, "cited_by_count": 1}], "updated_date": "2026-04-17T18:11:37.981687", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2109090232", "doi": "https://doi.org/10.1016/b978-1-55860-307-3.50030-7", "title": "Scaling Up Reinforcement Learning for Robot Control", "display_name": "Scaling Up Reinforcement Learning for Robot Control", "relevance_score": 275.89603, "publication_year": 1993, "publication_date": "1993-01-01", "ids": {"openalex": "https://openalex.org/W2109090232", "doi": "https://doi.org/10.1016/b978-1-55860-307-3.50030-7", "mag": "2109090232"}, "language": "en", "primary_location": {"id": "doi:10.1016/b978-1-55860-307-3.50030-7", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/b978-1-55860-307-3.50030-7", "pdf_url": null, "source": {"id": "https://openalex.org/S4306463230", "display_name": "Elsevier eBooks", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "ebook platform"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Machine Learning Proceedings 1993", "raw_type": "book-chapter"}, "type": "book-chapter", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5110204281", "display_name": "Long-Ji Lin", "orcid": null}, "institutions": [{"id": "https://openalex.org/I74973139", "display_name": "Carnegie Mellon University", "ror": "https://ror.org/05x2bcf33", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I74973139"]}], "countries": ["US"], "is_corresponding": true, "raw_author_name": "Long-Ji Lin", "raw_affiliation_strings": ["School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213"], "affiliations": [{"raw_affiliation_string": "School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213", "institution_ids": ["https://openalex.org/I74973139"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": ["https://openalex.org/A5110204281"], "corresponding_institution_ids": ["https://openalex.org/I74973139"], "apc_list": null, "apc_paid": null, "fwci": 8.1727, "has_fulltext": false, "cited_by_count": 83, "citation_normalized_percentile": {"value": 0.98088704, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 89, "max": 98}, "biblio": {"volume": null, "issue": null, "first_page": "182", "last_page": "189"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9991000294685364, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9991000294685364, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11975", "display_name": "Evolutionary Algorithms and Applications", "score": 0.9948999881744385, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10320", "display_name": "Neural Networks and Applications", "score": 0.9819999933242798, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.9099692106246948}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.6926379203796387}, {"id": "https://openalex.org/keywords/robot-learning", "display_name": "Robot learning", "score": 0.6890839338302612}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.6688024997711182}, {"id": "https://openalex.org/keywords/generalization", "display_name": "Generalization", "score": 0.6640235185623169}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.576328456401825}, {"id": "https://openalex.org/keywords/speedup", "display_name": "Speedup", "score": 0.5256867408752441}, {"id": "https://openalex.org/keywords/learning-classifier-system", "display_name": "Learning classifier system", "score": 0.4769688546657562}, {"id": "https://openalex.org/keywords/artificial-neural-network", "display_name": "Artificial neural network", "score": 0.4372811019420624}, {"id": "https://openalex.org/keywords/action-learning", "display_name": "Action learning", "score": 0.422453910112381}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.41367167234420776}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.20016056299209595}, {"id": "https://openalex.org/keywords/cooperative-learning", "display_name": "Cooperative learning", "score": 0.13780391216278076}, {"id": "https://openalex.org/keywords/psychology", "display_name": "Psychology", "score": 0.058804094791412354}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.056831419467926025}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.9099692106246948}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.6926379203796387}, {"id": "https://openalex.org/C188888258", "wikidata": "https://www.wikidata.org/wiki/Q7353390", "display_name": "Robot learning", "level": 4, "score": 0.6890839338302612}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.6688024997711182}, {"id": "https://openalex.org/C177148314", "wikidata": "https://www.wikidata.org/wiki/Q170084", "display_name": "Generalization", "level": 2, "score": 0.6640235185623169}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.576328456401825}, {"id": "https://openalex.org/C68339613", "wikidata": "https://www.wikidata.org/wiki/Q1549489", "display_name": "Speedup", "level": 2, "score": 0.5256867408752441}, {"id": "https://openalex.org/C199190896", "wikidata": "https://www.wikidata.org/wiki/Q3509276", "display_name": "Learning classifier system", "level": 3, "score": 0.4769688546657562}, {"id": "https://openalex.org/C50644808", "wikidata": "https://www.wikidata.org/wiki/Q192776", "display_name": "Artificial neural network", "level": 2, "score": 0.4372811019420624}, {"id": "https://openalex.org/C183759332", "wikidata": "https://www.wikidata.org/wiki/Q343680", "display_name": "Action learning", "level": 4, "score": 0.422453910112381}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.41367167234420776}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.20016056299209595}, {"id": "https://openalex.org/C51672120", "wikidata": "https://www.wikidata.org/wiki/Q303446", "display_name": "Cooperative learning", "level": 3, "score": 0.13780391216278076}, {"id": "https://openalex.org/C15744967", "wikidata": "https://www.wikidata.org/wiki/Q9418", "display_name": "Psychology", "level": 0, "score": 0.058804094791412354}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.056831419467926025}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C88610354", "wikidata": "https://www.wikidata.org/wiki/Q1813494", "display_name": "Teaching method", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C19417346", "wikidata": "https://www.wikidata.org/wiki/Q7922", "display_name": "Pedagogy", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1016/b978-1-55860-307-3.50030-7", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/b978-1-55860-307-3.50030-7", "pdf_url": null, "source": {"id": "https://openalex.org/S4306463230", "display_name": "Elsevier eBooks", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "ebook platform"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Machine Learning Proceedings 1993", "raw_type": "book-chapter"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 17, "referenced_works": ["https://openalex.org/W1491843047", "https://openalex.org/W1499371387", "https://openalex.org/W1588388339", "https://openalex.org/W1595483645", "https://openalex.org/W2012036715", "https://openalex.org/W2048226872", "https://openalex.org/W2052117683", "https://openalex.org/W2100677568", "https://openalex.org/W2101767921", "https://openalex.org/W2110485445", "https://openalex.org/W2112483970", "https://openalex.org/W2124817418", "https://openalex.org/W2141559645", "https://openalex.org/W2156245846", "https://openalex.org/W3011120880", "https://openalex.org/W3041202696", "https://openalex.org/W4253365321"], "related_works": ["https://openalex.org/W1976483367", "https://openalex.org/W2693300019", "https://openalex.org/W2097725510", "https://openalex.org/W1534851618", "https://openalex.org/W3022038857", "https://openalex.org/W2122027938", "https://openalex.org/W2393452615", "https://openalex.org/W2539786292", "https://openalex.org/W1595483645", "https://openalex.org/W4213341750"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2022, "cited_by_count": 1}, {"year": 2021, "cited_by_count": 3}, {"year": 2019, "cited_by_count": 4}, {"year": 2017, "cited_by_count": 1}, {"year": 2016, "cited_by_count": 2}, {"year": 2013, "cited_by_count": 1}], "updated_date": "2025-11-06T03:46:38.306776", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2262174858", "doi": null, "title": "Apprenticeship learning and reinforcement learning with application to robotic control", "display_name": "Apprenticeship learning and reinforcement learning with application to robotic control", "relevance_score": 263.10284, "publication_year": 2008, "publication_date": "2008-01-01", "ids": {"openalex": "https://openalex.org/W2262174858", "mag": "2262174858"}, "language": "en", "primary_location": {"id": "pmh:oai:pqdtoai.proquest.com:3332983", "is_oa": false, "landing_page_url": "http://pqdtopen.proquest.com/#viewpdf?dispub=3332983", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "thesis"}, "type": "article", "indexed_in": [], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5049349154", "display_name": "Pieter Abbeel", "orcid": null}, "institutions": [{"id": "https://openalex.org/I97018004", "display_name": "Stanford University", "ror": "https://ror.org/00f54p054", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I97018004"]}], "countries": ["US"], "is_corresponding": true, "raw_author_name": "Pieter Abbeel", "raw_affiliation_strings": ["Stanford University ()"], "affiliations": [{"raw_affiliation_string": "Stanford University ()", "institution_ids": ["https://openalex.org/I97018004"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 1, "corresponding_author_ids": ["https://openalex.org/A5049349154"], "corresponding_institution_ids": ["https://openalex.org/I97018004"], "apc_list": null, "apc_paid": null, "fwci": 8.5808, "has_fulltext": false, "cited_by_count": 72, "citation_normalized_percentile": {"value": 0.97518362, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 89, "max": 99}, "biblio": {"volume": null, "issue": null, "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11099", "display_name": "Autonomous Vehicle Technology and Safety", "score": 0.9905999898910522, "subfield": {"id": "https://openalex.org/subfields/2203", "display_name": "Automotive Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12072", "display_name": "Machine Learning and Algorithms", "score": 0.9886000156402588, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7923595905303955}, {"id": "https://openalex.org/keywords/traverse", "display_name": "Traverse", "score": 0.6694361567497253}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.668908953666687}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.6501655578613281}, {"id": "https://openalex.org/keywords/task", "display_name": "Task (project management)", "score": 0.6449666023254395}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6349065899848938}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.5135860443115234}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.4686512351036072}, {"id": "https://openalex.org/keywords/apprenticeship", "display_name": "Apprenticeship", "score": 0.4479144811630249}, {"id": "https://openalex.org/keywords/unobservable", "display_name": "Unobservable", "score": 0.44443386793136597}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.43831461668014526}, {"id": "https://openalex.org/keywords/terrain", "display_name": "Terrain", "score": 0.42798030376434326}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.196092426776886}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.10514238476753235}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7923595905303955}, {"id": "https://openalex.org/C176809094", "wikidata": "https://www.wikidata.org/wiki/Q15401496", "display_name": "Traverse", "level": 2, "score": 0.6694361567497253}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.668908953666687}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.6501655578613281}, {"id": "https://openalex.org/C2780451532", "wikidata": "https://www.wikidata.org/wiki/Q759676", "display_name": "Task (project management)", "level": 2, "score": 0.6449666023254395}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6349065899848938}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.5135860443115234}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.4686512351036072}, {"id": "https://openalex.org/C107806365", "wikidata": "https://www.wikidata.org/wiki/Q253567", "display_name": "Apprenticeship", "level": 2, "score": 0.4479144811630249}, {"id": "https://openalex.org/C2780695315", "wikidata": "https://www.wikidata.org/wiki/Q3799040", "display_name": "Unobservable", "level": 2, "score": 0.44443386793136597}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.43831461668014526}, {"id": "https://openalex.org/C161840515", "wikidata": "https://www.wikidata.org/wiki/Q186131", "display_name": "Terrain", "level": 2, "score": 0.42798030376434326}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.196092426776886}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.10514238476753235}, {"id": "https://openalex.org/C13280743", "wikidata": "https://www.wikidata.org/wiki/Q131089", "display_name": "Geodesy", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C205649164", "wikidata": "https://www.wikidata.org/wiki/Q1071", "display_name": "Geography", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C138885662", "wikidata": "https://www.wikidata.org/wiki/Q5891", "display_name": "Philosophy", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C18903297", "wikidata": "https://www.wikidata.org/wiki/Q7150", "display_name": "Ecology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C201995342", "wikidata": "https://www.wikidata.org/wiki/Q682496", "display_name": "Systems engineering", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C149782125", "wikidata": "https://www.wikidata.org/wiki/Q160039", "display_name": "Econometrics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C41895202", "wikidata": "https://www.wikidata.org/wiki/Q8162", "display_name": "Linguistics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "pmh:oai:pqdtoai.proquest.com:3332983", "is_oa": false, "landing_page_url": "http://pqdtopen.proquest.com/#viewpdf?dispub=3332983", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "thesis"}, {"id": "mag:2262174858", "is_oa": false, "landing_page_url": "https://dl.acm.org/citation.cfm?id=1571528", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": null}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 77, "referenced_works": ["https://openalex.org/W177093312", "https://openalex.org/W246617311", "https://openalex.org/W304861154", "https://openalex.org/W658381347", "https://openalex.org/W1495343868", "https://openalex.org/W1499408472", "https://openalex.org/W1511887321", "https://openalex.org/W1522531528", "https://openalex.org/W1537659559", "https://openalex.org/W1540685400", "https://openalex.org/W1542596583", "https://openalex.org/W1544444076", "https://openalex.org/W1554015367", "https://openalex.org/W1591675293", "https://openalex.org/W1594216983", "https://openalex.org/W1596805757", "https://openalex.org/W1597173708", "https://openalex.org/W1600813180", "https://openalex.org/W1602844557", "https://openalex.org/W1747856733", "https://openalex.org/W1777239053", "https://openalex.org/W1819208595", "https://openalex.org/W1821149804", "https://openalex.org/W1892385946", "https://openalex.org/W1949804828", "https://openalex.org/W1964535365", "https://openalex.org/W1965324089", "https://openalex.org/W1999874108", "https://openalex.org/W2012302913", "https://openalex.org/W2037539475", "https://openalex.org/W2044828368", "https://openalex.org/W2049633694", "https://openalex.org/W2054807159", "https://openalex.org/W2055207897", "https://openalex.org/W2056099894", "https://openalex.org/W2061562262", "https://openalex.org/W2064527819", "https://openalex.org/W2074231493", "https://openalex.org/W2096772472", "https://openalex.org/W2098432798", "https://openalex.org/W2099111195", "https://openalex.org/W2102222436", "https://openalex.org/W2103608534", "https://openalex.org/W2105934661", "https://openalex.org/W2105947986", "https://openalex.org/W2107726111", "https://openalex.org/W2111999018", "https://openalex.org/W2113023245", "https://openalex.org/W2116108870", "https://openalex.org/W2116442740", "https://openalex.org/W2117102490", "https://openalex.org/W2120591602", "https://openalex.org/W2121517924", "https://openalex.org/W2121863487", "https://openalex.org/W2125838338", "https://openalex.org/W2126105931", "https://openalex.org/W2128160875", "https://openalex.org/W2130105540", "https://openalex.org/W2139053308", "https://openalex.org/W2139302369", "https://openalex.org/W2142828048", "https://openalex.org/W2144587497", "https://openalex.org/W2148603752", "https://openalex.org/W2155357014", "https://openalex.org/W2158598687", "https://openalex.org/W2159080219", "https://openalex.org/W2161521419", "https://openalex.org/W2163614729", "https://openalex.org/W2167224731", "https://openalex.org/W2168359464", "https://openalex.org/W2169071224", "https://openalex.org/W2169080882", "https://openalex.org/W2169498096", "https://openalex.org/W2184803328", "https://openalex.org/W2489939061", "https://openalex.org/W2567948266", "https://openalex.org/W3195133498"], "related_works": ["https://openalex.org/W2121863487", "https://openalex.org/W2061562262", "https://openalex.org/W1986014385", "https://openalex.org/W1999874108", "https://openalex.org/W2169209873", "https://openalex.org/W1515851193", "https://openalex.org/W2087269417", "https://openalex.org/W2162991084", "https://openalex.org/W2145339207", "https://openalex.org/W1503697431", "https://openalex.org/W1756061918", "https://openalex.org/W2150930292", "https://openalex.org/W2119567691", "https://openalex.org/W1969160376", "https://openalex.org/W1777239053", "https://openalex.org/W2114882146", "https://openalex.org/W2890803796", "https://openalex.org/W3134537774", "https://openalex.org/W2964227158", "https://openalex.org/W1591675293"], "abstract_inverted_index": {"Many": [0], "problems": [1, 34, 119], "in": [2, 32, 44, 89, 124, 161, 209], "robotics": [3], "have": [4, 179, 192, 204], "unknown,": [5], "stochastic,": [6, 165], "high-dimensional,": [7], "and": [8, 12, 21, 80, 160, 167, 239], "highly": [9], "nonlinear": [10], "dynamics,": [11], "offer": [13], "significant": [14], "challenges": [15, 83], "to": [16, 41, 69, 85, 98, 142, 150, 183, 197, 229, 255], "both": [17, 77], "traditional": [18], "control": [19, 52, 145, 189], "methods": [20], "reinforcement": [22, 90], "learning": [23, 109, 127], "algorithms.": [24], "Some": [25], "of": [26, 50, 76, 133, 257], "the": [27, 51, 58, 86, 125, 134, 151, 157, 162, 171, 207, 219, 258], "key": [28], "difficulties": [29], "that": [30, 117, 256], "arise": [31], "these": [33, 118], "are:": [35], "(i)": [36], "It": [37, 65, 93], "is": [38, 57, 66, 94, 253], "often": [39, 67, 95], "difficult": [40, 68], "write": [42], "down,": [43], "closed": [45], "form,": [46], "a": [47, 71, 144, 194], "formal": [48, 112], "specification": [49], "task.": [53], "For": [54], "example,": [55], "what": [56], "objective": [59], "function": [60], "for": [61, 102], "\"flying": [62], "well\"?": [63], "(ii)": [64], "build": [70], "good": [72], "dynamics": [73], "model": [74], "because": [75], "data": [78, 81], "collection": [79], "modeling": [82], "(similar": [84], "\"exploration": [87], "problem\"": [88], "learning).": [91], "(iii)": [92], "computationally": [96], "expensive": [97], "find": [99], "closed-loop": [100], "controllers": [101], "high": [103], "dimensional,": [104], "stochastic": [105], "domains.": [106], "We": [107, 153], "describe": [108], "algorithms": [110, 139, 178], "with": [111, 147], "performance": [113, 148, 155, 252], "guarantees": [114], "which": [115, 241], "show": [116], "can": [120, 247], "be": [121], "efficiently": [122], "addressed": [123], "apprenticeship": [126], "setting\u2014the": [128], "setting": [129], "when": [130], "expert": [131, 244], "demonstrations": [132], "task": [135, 159], "are": [136, 140], "available.": [137], "Our": [138, 213, 249], "guaranteed": [141], "return": [143], "policy": [146], "comparable": [149, 254], "expert's.": [152], "evaluate": [154], "on": [156], "same": [158, 163], "(typically": [164], "high-dimensional": [166], "non-linear)": [168], "environment": [169], "as": [170, 234], "expert.": [172], "Besides": [173], "having": [174], "theoretical": [175], "guarantees,": [176], "our": [177], "also": [180], "enabled": [181, 193], "us": [182], "solve": [184], "some": [185], "previously": [186, 200], "unsolved": [187], "real-world": [188], "problems:": [190], "They": [191, 203], "quadruped": [195], "robot": [196], "traverse": [198], "challenging,": [199], "unseen": [201], "terrain.": [202], "significantly": [205], "extended": [206], "state-of-the-art": [208], "autonomous": [210, 227], "helicopter": [211, 214, 228], "flight.": [212], "has": [215], "performed": [216, 224], "by": [217, 225], "far": [218], "most": [220], "challenging": [221], "aerobatic": [222, 250], "maneuvers": [223, 232], "any": [226], "date,": [230], "including": [231], "such": [233], "continuous": [235], "in-place": [236], "flips,": [237], "rolls": [238], "tic-tocs,": [240], "only": [242], "exceptional": [243], "human": [245, 260], "pilots": [246], "fly.": [248], "flight": [251], "best": [259], "pilots.": [261]}, "counts_by_year": [{"year": 2024, "cited_by_count": 1}, {"year": 2023, "cited_by_count": 1}, {"year": 2022, "cited_by_count": 1}, {"year": 2021, "cited_by_count": 3}, {"year": 2020, "cited_by_count": 7}, {"year": 2019, "cited_by_count": 8}, {"year": 2018, "cited_by_count": 5}, {"year": 2017, "cited_by_count": 3}, {"year": 2016, "cited_by_count": 7}, {"year": 2015, "cited_by_count": 6}, {"year": 2014, "cited_by_count": 3}, {"year": 2013, "cited_by_count": 4}, {"year": 2012, "cited_by_count": 8}], "updated_date": "2025-11-06T04:12:42.849631", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2290354866", "doi": "https://doi.org/10.48550/arxiv.1603.00748", "title": "Continuous Deep Q-Learning with Model-based Acceleration", "display_name": "Continuous Deep Q-Learning with Model-based Acceleration", "relevance_score": 237.40227, "publication_year": 2016, "publication_date": "2016-03-02", "ids": {"openalex": "https://openalex.org/W2290354866", "doi": "https://doi.org/10.48550/arxiv.1603.00748", "mag": "2290354866"}, "language": "en", "primary_location": {"id": "pmh:oai:arXiv.org:1603.00748", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1603.00748", "pdf_url": "https://arxiv.org/pdf/1603.00748", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, "type": "article", "indexed_in": ["arxiv", "datacite"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": "https://arxiv.org/pdf/1603.00748", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5061613634", "display_name": "Shixiang Gu", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210135521", "display_name": "Max Planck Institute for Intelligent Systems", "ror": "https://ror.org/04fq9j139", "country_code": "DE", "type": "facility", "lineage": ["https://openalex.org/I149899117", "https://openalex.org/I4210135521"]}], "countries": ["DE", "US"], "is_corresponding": true, "raw_author_name": "Gu, Shixiang", "raw_affiliation_strings": ["University of Cambridge and Max Planck Institute for Intelligent Systems and Google Brain#TAB#"], "affiliations": [{"raw_affiliation_string": "University of Cambridge and Max Planck Institute for Intelligent Systems and Google Brain#TAB#", "institution_ids": ["https://openalex.org/I4210135521", "https://openalex.org/I1291425158"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5066294254", "display_name": "Timothy Lillicrap", "orcid": "https://orcid.org/0000-0001-8918-486X"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}, {"id": "https://openalex.org/I4210090411", "display_name": "DeepMind (United Kingdom)", "ror": "https://ror.org/00971b260", "country_code": "GB", "type": "company", "lineage": ["https://openalex.org/I4210090411", "https://openalex.org/I4210128969"]}], "countries": ["GB", "US"], "is_corresponding": false, "raw_author_name": "Lillicrap, Timothy", "raw_affiliation_strings": ["Google DeepMind;"], "affiliations": [{"raw_affiliation_string": "Google DeepMind;", "institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210090411"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5006446297", "display_name": "Ilya Sutskever", "orcid": null}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Sutskever, Ilya", "raw_affiliation_strings": ["[Google Brain]"], "affiliations": [{"raw_affiliation_string": "[Google Brain]", "institution_ids": ["https://openalex.org/I1291425158"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5026322200", "display_name": "Sergey Levine", "orcid": "https://orcid.org/0000-0001-6764-2743"}, "institutions": [{"id": "https://openalex.org/I1291425158", "display_name": "Google (United States)", "ror": "https://ror.org/00njsd438", "country_code": "US", "type": "company", "lineage": ["https://openalex.org/I1291425158", "https://openalex.org/I4210128969"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Levine, Sergey", "raw_affiliation_strings": ["[Google Brain]"], "affiliations": [{"raw_affiliation_string": "[Google Brain]", "institution_ids": ["https://openalex.org/I1291425158"]}]}], "institutions": [], "countries_distinct_count": 3, "institutions_distinct_count": 4, "corresponding_author_ids": ["https://openalex.org/A5061613634"], "corresponding_institution_ids": ["https://openalex.org/I1291425158", "https://openalex.org/I4210135521"], "apc_list": null, "apc_paid": null, "fwci": null, "has_fulltext": true, "cited_by_count": 337, "citation_normalized_percentile": null, "cited_by_percentile_year": null, "biblio": {"volume": null, "issue": null, "first_page": "2829", "last_page": "2838"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12794", "display_name": "Adaptive Dynamic Programming Control", "score": 0.9876000285148621, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11689", "display_name": "Adversarial Robustness in Machine Learning", "score": 0.9819999933242798, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.9228410124778748}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7602490186691284}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5720012187957764}, {"id": "https://openalex.org/keywords/limit", "display_name": "Limit (mathematics)", "score": 0.5509039759635925}, {"id": "https://openalex.org/keywords/artificial-neural-network", "display_name": "Artificial neural network", "score": 0.536858081817627}, {"id": "https://openalex.org/keywords/acceleration", "display_name": "Acceleration", "score": 0.5264359712600708}, {"id": "https://openalex.org/keywords/range", "display_name": "Range (aeronautics)", "score": 0.5211313366889954}, {"id": "https://openalex.org/keywords/temporal-difference-learning", "display_name": "Temporal difference learning", "score": 0.513974130153656}, {"id": "https://openalex.org/keywords/set", "display_name": "Set (abstract data type)", "score": 0.493217408657074}, {"id": "https://openalex.org/keywords/representation", "display_name": "Representation (politics)", "score": 0.47142165899276733}, {"id": "https://openalex.org/keywords/function", "display_name": "Function (biology)", "score": 0.46152347326278687}, {"id": "https://openalex.org/keywords/q-learning", "display_name": "Q-learning", "score": 0.4430024027824402}, {"id": "https://openalex.org/keywords/deep-learning", "display_name": "Deep learning", "score": 0.4366663694381714}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.4084426760673523}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.12573018670082092}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.9228410124778748}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7602490186691284}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5720012187957764}, {"id": "https://openalex.org/C151201525", "wikidata": "https://www.wikidata.org/wiki/Q177239", "display_name": "Limit (mathematics)", "level": 2, "score": 0.5509039759635925}, {"id": "https://openalex.org/C50644808", "wikidata": "https://www.wikidata.org/wiki/Q192776", "display_name": "Artificial neural network", "level": 2, "score": 0.536858081817627}, {"id": "https://openalex.org/C117896860", "wikidata": "https://www.wikidata.org/wiki/Q11376", "display_name": "Acceleration", "level": 2, "score": 0.5264359712600708}, {"id": "https://openalex.org/C204323151", "wikidata": "https://www.wikidata.org/wiki/Q905424", "display_name": "Range (aeronautics)", "level": 2, "score": 0.5211313366889954}, {"id": "https://openalex.org/C196340769", "wikidata": "https://www.wikidata.org/wiki/Q7698910", "display_name": "Temporal difference learning", "level": 3, "score": 0.513974130153656}, {"id": "https://openalex.org/C177264268", "wikidata": "https://www.wikidata.org/wiki/Q1514741", "display_name": "Set (abstract data type)", "level": 2, "score": 0.493217408657074}, {"id": "https://openalex.org/C2776359362", "wikidata": "https://www.wikidata.org/wiki/Q2145286", "display_name": "Representation (politics)", "level": 3, "score": 0.47142165899276733}, {"id": "https://openalex.org/C14036430", "wikidata": "https://www.wikidata.org/wiki/Q3736076", "display_name": "Function (biology)", "level": 2, "score": 0.46152347326278687}, {"id": "https://openalex.org/C188116033", "wikidata": "https://www.wikidata.org/wiki/Q2664563", "display_name": "Q-learning", "level": 3, "score": 0.4430024027824402}, {"id": "https://openalex.org/C108583219", "wikidata": "https://www.wikidata.org/wiki/Q197536", "display_name": "Deep learning", "level": 2, "score": 0.4366663694381714}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.4084426760673523}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.12573018670082092}, {"id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C74650414", "wikidata": "https://www.wikidata.org/wiki/Q11397", "display_name": "Classical mechanics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C94625758", "wikidata": "https://www.wikidata.org/wiki/Q7163", "display_name": "Politics", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C78458016", "wikidata": "https://www.wikidata.org/wiki/Q840400", "display_name": "Evolutionary biology", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C159985019", "wikidata": "https://www.wikidata.org/wiki/Q181790", "display_name": "Composite material", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C17744445", "wikidata": "https://www.wikidata.org/wiki/Q36442", "display_name": "Political science", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C199360897", "wikidata": "https://www.wikidata.org/wiki/Q9143", "display_name": "Programming language", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C192562407", "wikidata": "https://www.wikidata.org/wiki/Q228736", "display_name": "Materials science", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C199539241", "wikidata": "https://www.wikidata.org/wiki/Q7748", "display_name": "Law", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 4, "locations": [{"id": "pmh:oai:arXiv.org:1603.00748", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1603.00748", "pdf_url": "https://arxiv.org/pdf/1603.00748", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, {"id": "pmh:oai:www.repository.cam.ac.uk:1810/255896", "is_oa": true, "landing_page_url": "https://www.repository.cam.ac.uk/handle/1810/255896", "pdf_url": "https://www.repository.cam.ac.uk/handle/1810/255896", "source": {"id": "https://openalex.org/S4306401776", "display_name": "Apollo (University of Cambridge)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I241749", "host_organization_name": "University of Cambridge", "host_organization_lineage": ["https://openalex.org/I241749"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "published version"}, {"id": "doi:10.48550/arxiv.1603.00748", "is_oa": true, "landing_page_url": "https://doi.org/10.48550/arxiv.1603.00748", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": "article"}, {"id": "mag:2290354866", "is_oa": false, "landing_page_url": null, "pdf_url": null, "source": null, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": null}], "best_oa_location": {"id": "pmh:oai:arXiv.org:1603.00748", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1603.00748", "pdf_url": "https://arxiv.org/pdf/1603.00748", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "text"}, "sustainable_development_goals": [], "awards": [], "funders": [{"id": "https://openalex.org/F4320317153", "display_name": "DeepMind", "ror": "https://ror.org/00971b260"}], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 39, "referenced_works": ["https://openalex.org/W195033972", "https://openalex.org/W1491843047", "https://openalex.org/W1499669280", "https://openalex.org/W1597173708", "https://openalex.org/W1599347336", "https://openalex.org/W1771410628", "https://openalex.org/W1825869920", "https://openalex.org/W2012587148", "https://openalex.org/W2038794597", "https://openalex.org/W2087617385", "https://openalex.org/W2104733512", "https://openalex.org/W2115121720", "https://openalex.org/W2121103318", "https://openalex.org/W2127107099", "https://openalex.org/W2140135625", "https://openalex.org/W2143072483", "https://openalex.org/W2145339207", "https://openalex.org/W2154208977", "https://openalex.org/W2155027007", "https://openalex.org/W2156737235", "https://openalex.org/W2158782408", "https://openalex.org/W2165150801", "https://openalex.org/W2173564293", "https://openalex.org/W2296360731", "https://openalex.org/W2566089760", "https://openalex.org/W2949608212", "https://openalex.org/W2962986780", "https://openalex.org/W2963184621", "https://openalex.org/W2963280855", "https://openalex.org/W2963430173", "https://openalex.org/W2963477884", "https://openalex.org/W2963616477", "https://openalex.org/W2963864421", "https://openalex.org/W2964006217", "https://openalex.org/W2964121744", "https://openalex.org/W2964161785", "https://openalex.org/W2965916140", "https://openalex.org/W3148194443", "https://openalex.org/W3148685027"], "related_works": ["https://openalex.org/W2964161785", "https://openalex.org/W2964121744", "https://openalex.org/W2964043796", "https://openalex.org/W2962872206", "https://openalex.org/W2736601468", "https://openalex.org/W2257979135", "https://openalex.org/W2173248099", "https://openalex.org/W2165150801", "https://openalex.org/W2158782408", "https://openalex.org/W2155968351", "https://openalex.org/W2155027007", "https://openalex.org/W2145339207", "https://openalex.org/W2140135625", "https://openalex.org/W2121863487", "https://openalex.org/W1771410628", "https://openalex.org/W1757796397", "https://openalex.org/W1491843047", "https://openalex.org/W2119717200", "https://openalex.org/W2173564293", "https://openalex.org/W2964006217"], "abstract_inverted_index": {"Model-free": [0], "reinforcement": [1, 63, 154], "learning": [2, 64, 173], "has": [3, 14], "been": [4, 16], "successfully": [5], "applied": [6], "to": [7, 18, 41, 45, 56, 101, 115, 121], "a": [8, 84, 129], "range": [9], "of": [10, 31, 61, 78, 87, 131, 141, 148], "challenging": [11], "problems,": [12], "and": [13, 24, 54, 108, 124, 169], "recently": [15], "extended": [17], "handle": [19], "large": [20], "neural": [21], "network": [22], "policies": [23], "value": [25], "functions.": [26], "However,": [27], "the": [28, 58, 76, 88, 102, 139, 146], "sample": [29, 59], "complexity": [30, 60], "model-free": [32, 153], "algorithms,": [33], "particularly": [34], "when": [35], "using": [36], "high-dimensional": [37], "function": [38], "approximators,": [39], "tends": [40], "limit": [42], "their": [43], "applicability": [44], "physical": [46], "systems.": [47], "In": [48], "this": [49], "paper,": [50], "we": [51, 82, 92, 144], "explore": [52, 145], "algorithms": [53], "representations": [55], "reduce": [57], "deep": [62], "for": [65, 74, 151, 167], "continuous": [66, 85, 122], "control": [67, 134], "tasks.": [68, 135], "We": [69, 156], "propose": [70], "two": [71], "complementary": [72], "techniques": [73], "improving": [75], "efficiency": [77, 140], "such": [79, 177], "algorithms.": [80], "First,": [81], "derive": [83], "variant": [86], "Q-learning": [89, 117], "algorithm,": [90], "which": [91], "call": [93], "normalized": [94], "adantage": [95], "functions": [96], "(NAF),": [97], "as": [98], "an": [99], "alternative": [100], "more": [103], "commonly": [104], "used": [105], "policy": [106], "gradient": [107], "actor-critic": [109], "methods.": [110], "NAF": [111], "representation": [112], "allows": [113], "us": [114], "apply": [116], "with": [118], "experience": [119], "replay": [120], "tasks,": [123], "substantially": [125, 171], "improves": [126], "performance": [127], "on": [128, 174], "set": [130], "simulated": [132], "robotic": [133], "To": [136], "further": [137], "improve": [138], "our": [142], "approach,": [143], "use": [147], "learned": [149], "models": [150, 163, 178], "accelerating": [152], "learning.": [155], "show": [157], "that": [158], "iteratively": [159], "refitted": [160], "local": [161], "linear": [162], "are": [164, 179], "especially": [165], "effective": [166], "this,": [168], "demonstrate": [170], "faster": [172], "domains": [175], "where": [176], "applicable.": [180]}, "counts_by_year": [{"year": 2024, "cited_by_count": 5}, {"year": 2023, "cited_by_count": 4}, {"year": 2022, "cited_by_count": 12}, {"year": 2021, "cited_by_count": 63}, {"year": 2020, "cited_by_count": 83}, {"year": 2019, "cited_by_count": 88}, {"year": 2018, "cited_by_count": 55}, {"year": 2017, "cited_by_count": 22}, {"year": 2016, "cited_by_count": 5}], "updated_date": "2026-04-04T16:13:02.066488", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W4205837126", "doi": "https://doi.org/10.3390/app12020937", "title": "Smart Industrial Robot Control Trends, Challenges and Opportunities within Manufacturing", "display_name": "Smart Industrial Robot Control Trends, Challenges and Opportunities within Manufacturing", "relevance_score": 227.61636, "publication_year": 2022, "publication_date": "2022-01-17", "ids": {"openalex": "https://openalex.org/W4205837126", "doi": "https://doi.org/10.3390/app12020937"}, "language": "en", "primary_location": {"id": "doi:10.3390/app12020937", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/app12020937", "pdf_url": "https://www.mdpi.com/2076-3417/12/2/937/pdf?version=1642436013", "source": {"id": "https://openalex.org/S4210205812", "display_name": "Applied Sciences", "issn_l": "2076-3417", "issn": ["2076-3417"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Applied Sciences", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref", "doaj"], "open_access": {"is_oa": true, "oa_status": "gold", "oa_url": "https://www.mdpi.com/2076-3417/12/2/937/pdf?version=1642436013", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5054621323", "display_name": "J\u0101nis \u0100rents", "orcid": "https://orcid.org/0000-0001-5203-3347"}, "institutions": [{"id": "https://openalex.org/I4210162447", "display_name": "Institute of Electronics and Computer Science", "ror": "https://ror.org/05bsp2531", "country_code": "LV", "type": "facility", "lineage": ["https://openalex.org/I4210162447", "https://openalex.org/I70055295"]}], "countries": ["LV"], "is_corresponding": true, "raw_author_name": "Janis Arents", "raw_affiliation_strings": ["Institute of Electronics and Computer Science, 14 Dzerbenes St., LV-1006 Riga, Latvia"], "raw_orcid": "https://orcid.org/0000-0001-5203-3347", "affiliations": [{"raw_affiliation_string": "Institute of Electronics and Computer Science, 14 Dzerbenes St., LV-1006 Riga, Latvia", "institution_ids": ["https://openalex.org/I4210162447"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5072892931", "display_name": "Modris Greit\u0101ns", "orcid": "https://orcid.org/0000-0002-5405-0738"}, "institutions": [{"id": "https://openalex.org/I4210162447", "display_name": "Institute of Electronics and Computer Science", "ror": "https://ror.org/05bsp2531", "country_code": "LV", "type": "facility", "lineage": ["https://openalex.org/I4210162447", "https://openalex.org/I70055295"]}], "countries": ["LV"], "is_corresponding": false, "raw_author_name": "Modris Greitans", "raw_affiliation_strings": ["Institute of Electronics and Computer Science, 14 Dzerbenes St., LV-1006 Riga, Latvia"], "raw_orcid": "https://orcid.org/0000-0002-5405-0738", "affiliations": [{"raw_affiliation_string": "Institute of Electronics and Computer Science, 14 Dzerbenes St., LV-1006 Riga, Latvia", "institution_ids": ["https://openalex.org/I4210162447"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 2, "corresponding_author_ids": ["https://openalex.org/A5054621323"], "corresponding_institution_ids": ["https://openalex.org/I4210162447"], "apc_list": {"value": 2300, "currency": "CHF", "value_usd": 2490}, "apc_paid": {"value": 2300, "currency": "CHF", "value_usd": 2490}, "fwci": 31.781, "has_fulltext": false, "cited_by_count": 287, "citation_normalized_percentile": {"value": 0.99942475, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 99, "max": 100}, "biblio": {"volume": "12", "issue": "2", "first_page": "937", "last_page": "937"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9980000257492065, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9980000257492065, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12111", "display_name": "Industrial Vision Systems and Defect Detection", "score": 0.9890999794006348, "subfield": {"id": "https://openalex.org/subfields/2209", "display_name": "Industrial and Manufacturing Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10763", "display_name": "Digital Transformation in Industry", "score": 0.9733999967575073, "subfield": {"id": "https://openalex.org/subfields/2209", "display_name": "Industrial and Manufacturing Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.5922507047653198}, {"id": "https://openalex.org/keywords/flexibility", "display_name": "Flexibility (engineering)", "score": 0.5631266832351685}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.46441027522087097}, {"id": "https://openalex.org/keywords/field", "display_name": "Field (mathematics)", "score": 0.46021947264671326}, {"id": "https://openalex.org/keywords/smart-manufacturing", "display_name": "Smart manufacturing", "score": 0.4564518928527832}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.42232292890548706}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.4212660491466522}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.41628211736679077}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.36408108472824097}, {"id": "https://openalex.org/keywords/manufacturing-engineering", "display_name": "Manufacturing engineering", "score": 0.31831610202789307}, {"id": "https://openalex.org/keywords/management", "display_name": "Management", "score": 0.07213842868804932}], "concepts": [{"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.5922507047653198}, {"id": "https://openalex.org/C2780598303", "wikidata": "https://www.wikidata.org/wiki/Q65921492", "display_name": "Flexibility (engineering)", "level": 2, "score": 0.5631266832351685}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.46441027522087097}, {"id": "https://openalex.org/C9652623", "wikidata": "https://www.wikidata.org/wiki/Q190109", "display_name": "Field (mathematics)", "level": 2, "score": 0.46021947264671326}, {"id": "https://openalex.org/C2988642114", "wikidata": "https://www.wikidata.org/wiki/Q25112020", "display_name": "Smart manufacturing", "level": 2, "score": 0.4564518928527832}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.42232292890548706}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.4212660491466522}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.41628211736679077}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.36408108472824097}, {"id": "https://openalex.org/C117671659", "wikidata": "https://www.wikidata.org/wiki/Q11049265", "display_name": "Manufacturing engineering", "level": 1, "score": 0.31831610202789307}, {"id": "https://openalex.org/C187736073", "wikidata": "https://www.wikidata.org/wiki/Q2920921", "display_name": "Management", "level": 1, "score": 0.07213842868804932}, {"id": "https://openalex.org/C202444582", "wikidata": "https://www.wikidata.org/wiki/Q837863", "display_name": "Pure mathematics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C162324750", "wikidata": "https://www.wikidata.org/wiki/Q8134", "display_name": "Economics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 3, "locations": [{"id": "doi:10.3390/app12020937", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/app12020937", "pdf_url": "https://www.mdpi.com/2076-3417/12/2/937/pdf?version=1642436013", "source": {"id": "https://openalex.org/S4210205812", "display_name": "Applied Sciences", "issn_l": "2076-3417", "issn": ["2076-3417"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Applied Sciences", "raw_type": "journal-article"}, {"id": "pmh:oai:doaj.org/article:51d3cb9089504cc1b4d66cdfdaa331be", "is_oa": true, "landing_page_url": "https://doaj.org/article/51d3cb9089504cc1b4d66cdfdaa331be", "pdf_url": null, "source": {"id": "https://openalex.org/S4306401280", "display_name": "DOAJ (DOAJ: Directory of Open Access Journals)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by-sa", "license_id": "https://openalex.org/licenses/cc-by-sa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Applied Sciences, Vol 12, Iss 2, p 937 (2022)", "raw_type": "article"}, {"id": "pmh:oai:mdpi.com:/2076-3417/12/2/937/", "is_oa": true, "landing_page_url": "https://dx.doi.org/10.3390/app12020937", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400947", "display_name": "MDPI (MDPI AG)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I4210097602", "host_organization_name": "Multidisciplinary Digital Publishing Institute (Switzerland)", "host_organization_lineage": ["https://openalex.org/I4210097602"], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Applied Sciences; Volume 12; Issue 2; Pages: 937", "raw_type": "Text"}], "best_oa_location": {"id": "doi:10.3390/app12020937", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/app12020937", "pdf_url": "https://www.mdpi.com/2076-3417/12/2/937/pdf?version=1642436013", "source": {"id": "https://openalex.org/S4210205812", "display_name": "Applied Sciences", "issn_l": "2076-3417", "issn": ["2076-3417"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Applied Sciences", "raw_type": "journal-article"}, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/9", "score": 0.6000000238418579, "display_name": "Industry, innovation and infrastructure"}], "awards": [], "funders": [], "has_content": {"pdf": true, "grobid_xml": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W4205837126.pdf", "grobid_xml": "https://content.openalex.org/works/W4205837126.grobid-xml"}, "referenced_works_count": 110, "referenced_works": ["https://openalex.org/W639708223", "https://openalex.org/W1258130145", "https://openalex.org/W1481659984", "https://openalex.org/W1606048812", "https://openalex.org/W1977655452", "https://openalex.org/W1988785119", "https://openalex.org/W1991544872", "https://openalex.org/W1999156278", "https://openalex.org/W2012392077", "https://openalex.org/W2044165158", "https://openalex.org/W2074220104", "https://openalex.org/W2089806275", "https://openalex.org/W2132862423", "https://openalex.org/W2174644224", "https://openalex.org/W2205414745", "https://openalex.org/W2257979135", "https://openalex.org/W2570343428", "https://openalex.org/W2604236302", "https://openalex.org/W2604726708", "https://openalex.org/W2604736759", "https://openalex.org/W2615190535", "https://openalex.org/W2766196346", "https://openalex.org/W2767346351", "https://openalex.org/W2769112066", "https://openalex.org/W2771075179", "https://openalex.org/W2771896380", "https://openalex.org/W2793432429", "https://openalex.org/W2794908222", "https://openalex.org/W2891372393", "https://openalex.org/W2901362978", "https://openalex.org/W2902780211", "https://openalex.org/W2907108913", "https://openalex.org/W2910474428", "https://openalex.org/W2910788814", "https://openalex.org/W2912196091", "https://openalex.org/W2913358857", "https://openalex.org/W2919358988", "https://openalex.org/W2958510209", "https://openalex.org/W2962736495", "https://openalex.org/W2962759351", "https://openalex.org/W2962793652", "https://openalex.org/W2963033241", "https://openalex.org/W2963188159", "https://openalex.org/W2963669336", "https://openalex.org/W2964239605", "https://openalex.org/W2964249569", "https://openalex.org/W2964333597", "https://openalex.org/W2967727187", "https://openalex.org/W2968268581", "https://openalex.org/W2969113429", "https://openalex.org/W2970520611", "https://openalex.org/W2976205474", "https://openalex.org/W2979417040", "https://openalex.org/W2980888398", "https://openalex.org/W2981697369", "https://openalex.org/W2984673978", "https://openalex.org/W2984754495", "https://openalex.org/W2993476843", "https://openalex.org/W2995284451", "https://openalex.org/W3003620461", "https://openalex.org/W3004047800", "https://openalex.org/W3008535267", "https://openalex.org/W3010515602", "https://openalex.org/W3010834209", "https://openalex.org/W3016690394", "https://openalex.org/W3016969588", "https://openalex.org/W3023241112", "https://openalex.org/W3028308378", "https://openalex.org/W3028964554", "https://openalex.org/W3033312324", "https://openalex.org/W3044115425", "https://openalex.org/W3046735138", "https://openalex.org/W3046748421", "https://openalex.org/W3082854280", "https://openalex.org/W3088013802", "https://openalex.org/W3088158297", "https://openalex.org/W3089580269", "https://openalex.org/W3092037789", "https://openalex.org/W3096973715", "https://openalex.org/W3099982022", "https://openalex.org/W3100172161", "https://openalex.org/W3100789280", "https://openalex.org/W3101103779", "https://openalex.org/W3112160583", "https://openalex.org/W3122928565", "https://openalex.org/W3125366729", "https://openalex.org/W3127352841", "https://openalex.org/W3128707967", "https://openalex.org/W3131225419", "https://openalex.org/W3131554325", "https://openalex.org/W3131966994", "https://openalex.org/W3133743901", "https://openalex.org/W3133750590", "https://openalex.org/W3136021864", "https://openalex.org/W3161970973", "https://openalex.org/W3167898434", "https://openalex.org/W3181223963", "https://openalex.org/W3194459689", "https://openalex.org/W4242227498", "https://openalex.org/W4252782962", "https://openalex.org/W4312562390", "https://openalex.org/W4313031817", "https://openalex.org/W6620707391", "https://openalex.org/W6622324178", "https://openalex.org/W6668794633", "https://openalex.org/W6721962699", "https://openalex.org/W6764969207", "https://openalex.org/W6771689172", "https://openalex.org/W6777660902", "https://openalex.org/W6795768249"], "related_works": ["https://openalex.org/W1508899372", "https://openalex.org/W4236696095", "https://openalex.org/W3143779693", "https://openalex.org/W2012658348", "https://openalex.org/W3013410248", "https://openalex.org/W2910904538", "https://openalex.org/W3092604565", "https://openalex.org/W4214836412", "https://openalex.org/W2770239401", "https://openalex.org/W2907002303"], "abstract_inverted_index": {"Industrial": [0], "robots": [1, 32], "and": [2, 28, 50, 86, 106, 115, 122, 132], "associated": [3], "control": [4, 24, 75, 120], "methods": [5, 107], "are": [6, 38, 57, 100, 108, 127, 135], "continuously": [7], "developing.": [8], "With": [9], "the": [10, 14, 43, 59, 69, 83, 88, 96, 138], "recent": [11], "progress": [12], "in": [13, 21, 76, 125], "field": [15], "of": [16, 42, 46, 53, 71, 91], "artificial": [17], "intelligence,": [18], "new": [19], "perspectives": [20], "industrial": [22, 73, 93], "robot": [23, 74, 119], "strategies": [25, 105], "have": [26, 33], "emerged,": [27], "prospects": [29], "towards": [30, 78], "cognitive": [31], "arisen.": [34], "AI-based": [35], "robotic": [36], "systems": [37], "strongly": [39], "becoming": [40, 58], "one": [41], "main": [44], "areas": [45], "focus,": [47], "as": [48], "flexibility": [49], "deep": [51, 112], "understanding": [52], "complex": [54], "manufacturing": [55, 77, 126], "processes": [56], "key": [60], "advantage": [61], "to": [62], "raise": [63], "competitiveness.": [64], "This": [65], "review": [66], "first": [67], "expresses": [68], "significance": [70], "smart": [72, 92], "future": [79], "factories": [80], "by": [81], "listing": [82], "needs,": [84], "requirements": [85], "introducing": [87], "envisioned": [89], "concept": [90], "robots.": [94], "Secondly,": [95], "current": [97], "trends": [98], "that": [99], "based": [101, 118], "on": [102], "different": [103], "learning": [104, 114, 117], "explored.": [109], "Current": [110], "computer-vision,": [111], "reinforcement": [113], "imitation": [116], "approaches": [121], "possible": [123], "applications": [124], "investigated.": [128], "Gaps,": [129], "challenges,": [130], "limitations": [131], "open": [133], "issues": [134], "identified": [136], "along": [137], "way.": [139]}, "counts_by_year": [{"year": 2026, "cited_by_count": 25}, {"year": 2025, "cited_by_count": 100}, {"year": 2024, "cited_by_count": 83}, {"year": 2023, "cited_by_count": 51}, {"year": 2022, "cited_by_count": 28}], "updated_date": "2026-05-21T09:19:25.381259", "created_date": "2022-01-26T00:00:00"}, {"id": "https://openalex.org/W3048090341", "doi": "https://doi.org/10.1007/s10846-020-01237-6", "title": "A Fuzzy Reinforcement Learning Approach for Continuum Robot Control", "display_name": "A Fuzzy Reinforcement Learning Approach for Continuum Robot Control", "relevance_score": 219.91722, "publication_year": 2020, "publication_date": "2020-08-06", "ids": {"openalex": "https://openalex.org/W3048090341", "doi": "https://doi.org/10.1007/s10846-020-01237-6", "mag": "3048090341"}, "language": "en", "primary_location": {"id": "doi:10.1007/s10846-020-01237-6", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/s10846-020-01237-6", "pdf_url": null, "source": {"id": "https://openalex.org/S91329792", "display_name": "Journal of Intelligent & Robotic Systems", "issn_l": "0921-0296", "issn": ["0921-0296", "1573-0409"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Journal of Intelligent &amp; Robotic Systems", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5082196811", "display_name": "Masoud Goharimanesh", "orcid": "https://orcid.org/0000-0001-7527-1014"}, "institutions": [{"id": "https://openalex.org/I4210133510", "display_name": "University of Torbat Heydarieh", "ror": "https://ror.org/03ncps145", "country_code": "IR", "type": "education", "lineage": ["https://openalex.org/I4210133510"]}], "countries": ["IR"], "is_corresponding": true, "raw_author_name": "M. Goharimanesh", "raw_affiliation_strings": ["Department of Mechanical Engineering, University of Torbat Heydarieh, Torbat Heydarieh, Iran"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Mechanical Engineering, University of Torbat Heydarieh, Torbat Heydarieh, Iran", "institution_ids": ["https://openalex.org/I4210133510"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5059755195", "display_name": "Ali Mehrkish", "orcid": "https://orcid.org/0000-0003-4096-2856"}, "institutions": [{"id": "https://openalex.org/I530967", "display_name": "Toronto Metropolitan University", "ror": "https://ror.org/05g13zd79", "country_code": "CA", "type": "education", "lineage": ["https://openalex.org/I530967"]}], "countries": ["CA"], "is_corresponding": false, "raw_author_name": "A. Mehrkish", "raw_affiliation_strings": ["Department of Mechanical and Industrial Engineering, Ryerson University, 350 Victoria Street, Toronto, M5B 2K3, Canada"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Department of Mechanical and Industrial Engineering, Ryerson University, 350 Victoria Street, Toronto, M5B 2K3, Canada", "institution_ids": ["https://openalex.org/I530967"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5025228155", "display_name": "Farrokh Janabi\u2010Sharifi", "orcid": "https://orcid.org/0000-0002-0314-0688"}, "institutions": [{"id": "https://openalex.org/I530967", "display_name": "Toronto Metropolitan University", "ror": "https://ror.org/05g13zd79", "country_code": "CA", "type": "education", "lineage": ["https://openalex.org/I530967"]}], "countries": ["CA"], "is_corresponding": false, "raw_author_name": "F. Janabi-Sharifi", "raw_affiliation_strings": ["Department of Mechanical and Industrial Engineering, Ryerson University, 350 Victoria Street, Toronto, M5B 2K3, Canada"], "raw_orcid": "https://orcid.org/0000-0002-0314-0688", "affiliations": [{"raw_affiliation_string": "Department of Mechanical and Industrial Engineering, Ryerson University, 350 Victoria Street, Toronto, M5B 2K3, Canada", "institution_ids": ["https://openalex.org/I530967"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 3, "corresponding_author_ids": ["https://openalex.org/A5082196811"], "corresponding_institution_ids": ["https://openalex.org/I4210133510"], "apc_list": {"value": 2390, "currency": "EUR", "value_usd": 2990}, "apc_paid": null, "fwci": 2.7872, "has_fulltext": false, "cited_by_count": 56, "citation_normalized_percentile": {"value": 0.90586896, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 89, "max": 100}, "biblio": {"volume": "100", "issue": "3-4", "first_page": "809", "last_page": "826"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10868", "display_name": "Soft Robotics and Applications", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10868", "display_name": "Soft Robotics and Applications", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.986299991607666, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11023", "display_name": "Prosthetics and Rehabilitation Robotics", "score": 0.9832000136375427, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.656434178352356}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5417100191116333}, {"id": "https://openalex.org/keywords/flexibility", "display_name": "Flexibility (engineering)", "score": 0.5361276865005493}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.5097693800926208}, {"id": "https://openalex.org/keywords/control-theory", "display_name": "Control theory (sociology)", "score": 0.5060991644859314}, {"id": "https://openalex.org/keywords/fuzzy-logic", "display_name": "Fuzzy logic", "score": 0.5011134147644043}, {"id": "https://openalex.org/keywords/trajectory", "display_name": "Trajectory", "score": 0.46056658029556274}, {"id": "https://openalex.org/keywords/convergence", "display_name": "Convergence (economics)", "score": 0.4595031440258026}, {"id": "https://openalex.org/keywords/control-engineering", "display_name": "Control engineering", "score": 0.3474181890487671}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.29624611139297485}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.2787751853466034}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.23013874888420105}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.18876561522483826}, {"id": "https://openalex.org/keywords/physics", "display_name": "Physics", "score": 0.10961547493934631}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.656434178352356}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5417100191116333}, {"id": "https://openalex.org/C2780598303", "wikidata": "https://www.wikidata.org/wiki/Q65921492", "display_name": "Flexibility (engineering)", "level": 2, "score": 0.5361276865005493}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.5097693800926208}, {"id": "https://openalex.org/C47446073", "wikidata": "https://www.wikidata.org/wiki/Q5165890", "display_name": "Control theory (sociology)", "level": 3, "score": 0.5060991644859314}, {"id": "https://openalex.org/C58166", "wikidata": "https://www.wikidata.org/wiki/Q224821", "display_name": "Fuzzy logic", "level": 2, "score": 0.5011134147644043}, {"id": "https://openalex.org/C13662910", "wikidata": "https://www.wikidata.org/wiki/Q193139", "display_name": "Trajectory", "level": 2, "score": 0.46056658029556274}, {"id": "https://openalex.org/C2777303404", "wikidata": "https://www.wikidata.org/wiki/Q759757", "display_name": "Convergence (economics)", "level": 2, "score": 0.4595031440258026}, {"id": "https://openalex.org/C133731056", "wikidata": "https://www.wikidata.org/wiki/Q4917288", "display_name": "Control engineering", "level": 1, "score": 0.3474181890487671}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.29624611139297485}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.2787751853466034}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.23013874888420105}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.18876561522483826}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.10961547493934631}, {"id": "https://openalex.org/C1276947", "wikidata": "https://www.wikidata.org/wiki/Q333", "display_name": "Astronomy", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C105795698", "wikidata": "https://www.wikidata.org/wiki/Q12483", "display_name": "Statistics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C50522688", "wikidata": "https://www.wikidata.org/wiki/Q189833", "display_name": "Economic growth", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C162324750", "wikidata": "https://www.wikidata.org/wiki/Q8134", "display_name": "Economics", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1007/s10846-020-01237-6", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/s10846-020-01237-6", "pdf_url": null, "source": {"id": "https://openalex.org/S91329792", "display_name": "Journal of Intelligent & Robotic Systems", "issn_l": "0921-0296", "issn": ["0921-0296", "1573-0409"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Journal of Intelligent &amp; Robotic Systems", "raw_type": "journal-article"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [{"id": "https://openalex.org/G5337209168", "display_name": null, "funder_award_id": "2019/06", "funder_id": "https://openalex.org/F4320310153", "funder_display_name": "Ryerson University"}, {"id": "https://openalex.org/G969993561", "display_name": null, "funder_award_id": "2017-06930", "funder_id": "https://openalex.org/F4320334593", "funder_display_name": "Natural Sciences and Engineering Research Council of Canada"}], "funders": [{"id": "https://openalex.org/F4320310153", "display_name": "Ryerson University", "ror": "https://ror.org/05g13zd79"}, {"id": "https://openalex.org/F4320334593", "display_name": "Natural Sciences and Engineering Research Council of Canada", "ror": "https://ror.org/01h531d29"}], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 61, "referenced_works": ["https://openalex.org/W38129364", "https://openalex.org/W1524590318", "https://openalex.org/W1551593752", "https://openalex.org/W1557517019", "https://openalex.org/W1962883659", "https://openalex.org/W1976307222", "https://openalex.org/W1979286982", "https://openalex.org/W1988003703", "https://openalex.org/W1990005421", "https://openalex.org/W1995071895", "https://openalex.org/W1997060717", "https://openalex.org/W2006573361", "https://openalex.org/W2030827420", "https://openalex.org/W2037867075", "https://openalex.org/W2042402350", "https://openalex.org/W2054875385", "https://openalex.org/W2057889805", "https://openalex.org/W2085238338", "https://openalex.org/W2097854201", "https://openalex.org/W2099618002", "https://openalex.org/W2119343887", "https://openalex.org/W2120846115", "https://openalex.org/W2122275926", "https://openalex.org/W2124175081", "https://openalex.org/W2139416664", "https://openalex.org/W2150205249", "https://openalex.org/W2202412457", "https://openalex.org/W2213299816", "https://openalex.org/W2221830006", "https://openalex.org/W2290611374", "https://openalex.org/W2318613964", "https://openalex.org/W2319022682", "https://openalex.org/W2335997780", "https://openalex.org/W2509818088", "https://openalex.org/W2524477492", "https://openalex.org/W2530266826", "https://openalex.org/W2567356819", "https://openalex.org/W2577724726", "https://openalex.org/W2588445306", "https://openalex.org/W2605447266", "https://openalex.org/W2763529930", "https://openalex.org/W2770049904", "https://openalex.org/W2775361682", "https://openalex.org/W2785688106", "https://openalex.org/W2790147543", "https://openalex.org/W2885248555", "https://openalex.org/W2888546857", "https://openalex.org/W2890623127", "https://openalex.org/W2901112449", "https://openalex.org/W2904237456", "https://openalex.org/W2910702499", "https://openalex.org/W2911226473", "https://openalex.org/W2912944518", "https://openalex.org/W2934649944", "https://openalex.org/W2967429195", "https://openalex.org/W2982962963", "https://openalex.org/W3149874564", "https://openalex.org/W4214717370", "https://openalex.org/W4231838407", "https://openalex.org/W4231909678", "https://openalex.org/W6603932791"], "related_works": ["https://openalex.org/W4306904969", "https://openalex.org/W2138720691", "https://openalex.org/W4362501864", "https://openalex.org/W4323768008", "https://openalex.org/W1941703695", "https://openalex.org/W4380318855", "https://openalex.org/W3084456289", "https://openalex.org/W2024136090", "https://openalex.org/W3131574667", "https://openalex.org/W4391331176"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2026, "cited_by_count": 1}, {"year": 2025, "cited_by_count": 8}, {"year": 2024, "cited_by_count": 15}, {"year": 2023, "cited_by_count": 9}, {"year": 2022, "cited_by_count": 15}, {"year": 2021, "cited_by_count": 7}, {"year": 2020, "cited_by_count": 1}], "updated_date": "2025-11-06T03:46:38.306776", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2045463255", "doi": "https://doi.org/10.1016/j.robot.2003.11.006", "title": "A reinforcement learning with evolutionary state recruitment strategy for autonomous mobile robots control", "display_name": "A reinforcement learning with evolutionary state recruitment strategy for autonomous mobile robots control", "relevance_score": 210.20781, "publication_year": 2004, "publication_date": "2004-01-02", "ids": {"openalex": "https://openalex.org/W2045463255", "doi": "https://doi.org/10.1016/j.robot.2003.11.006", "mag": "2045463255"}, "language": "en", "primary_location": {"id": "doi:10.1016/j.robot.2003.11.006", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.robot.2003.11.006", "pdf_url": null, "source": {"id": "https://openalex.org/S133768115", "display_name": "Robotics and Autonomous Systems", "issn_l": "0921-8890", "issn": ["0921-8890", "1872-793X"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Robotics and Autonomous Systems", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5105405852", "display_name": "Toshiyuki Kondo", "orcid": "https://orcid.org/0000-0001-8200-9276"}, "institutions": [{"id": "https://openalex.org/I114531698", "display_name": "Tokyo Institute of Technology", "ror": "https://ror.org/0112mx960", "country_code": "JP", "type": "education", "lineage": ["https://openalex.org/I114531698"]}], "countries": ["JP"], "is_corresponding": true, "raw_author_name": "Toshiyuki Kondo", "raw_affiliation_strings": ["Department of Computational Intelligence and Systems Science, Interdisciplinary Graduate School of Science and Engineering, Tokyo Institute of Technology, 4259 Nagatsuta, Midori-ku, Yokohama 226-8502, Japan", "Department of Computational Intelligence and Systems Science, Interdisciplinary Graduate School of Science and Engineering, Tokyo Institute of Technology 4259 Nagatsuta, Midori-ku, Yokohama 226-8502, Japan"], "affiliations": [{"raw_affiliation_string": "Department of Computational Intelligence and Systems Science, Interdisciplinary Graduate School of Science and Engineering, Tokyo Institute of Technology, 4259 Nagatsuta, Midori-ku, Yokohama 226-8502, Japan", "institution_ids": ["https://openalex.org/I114531698"]}, {"raw_affiliation_string": "Department of Computational Intelligence and Systems Science, Interdisciplinary Graduate School of Science and Engineering, Tokyo Institute of Technology 4259 Nagatsuta, Midori-ku, Yokohama 226-8502, Japan", "institution_ids": ["https://openalex.org/I114531698"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5102044901", "display_name": "Koji Ito", "orcid": null}, "institutions": [{"id": "https://openalex.org/I114531698", "display_name": "Tokyo Institute of Technology", "ror": "https://ror.org/0112mx960", "country_code": "JP", "type": "education", "lineage": ["https://openalex.org/I114531698"]}], "countries": ["JP"], "is_corresponding": false, "raw_author_name": "Koji Ito", "raw_affiliation_strings": ["Department of Computational Intelligence and Systems Science, Interdisciplinary Graduate School of Science and Engineering, Tokyo Institute of Technology, 4259 Nagatsuta, Midori-ku, Yokohama 226-8502, Japan", "Department of Computational Intelligence and Systems Science, Interdisciplinary Graduate School of Science and Engineering, Tokyo Institute of Technology 4259 Nagatsuta, Midori-ku, Yokohama 226-8502, Japan"], "affiliations": [{"raw_affiliation_string": "Department of Computational Intelligence and Systems Science, Interdisciplinary Graduate School of Science and Engineering, Tokyo Institute of Technology, 4259 Nagatsuta, Midori-ku, Yokohama 226-8502, Japan", "institution_ids": ["https://openalex.org/I114531698"]}, {"raw_affiliation_string": "Department of Computational Intelligence and Systems Science, Interdisciplinary Graduate School of Science and Engineering, Tokyo Institute of Technology 4259 Nagatsuta, Midori-ku, Yokohama 226-8502, Japan", "institution_ids": ["https://openalex.org/I114531698"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 2, "corresponding_author_ids": ["https://openalex.org/A5105405852"], "corresponding_institution_ids": ["https://openalex.org/I114531698"], "apc_list": {"value": 3020, "currency": "USD", "value_usd": 3020}, "apc_paid": null, "fwci": 6.938, "has_fulltext": false, "cited_by_count": 69, "citation_normalized_percentile": {"value": 0.96643799, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 89, "max": 98}, "biblio": {"volume": "46", "issue": "2", "first_page": "111", "last_page": "124"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9998000264167786, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11975", "display_name": "Evolutionary Algorithms and Applications", "score": 0.9987000226974487, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10100", "display_name": "Metaheuristic Optimization Algorithms Research", "score": 0.9959999918937683, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.8903038501739502}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8824948072433472}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.6533159017562866}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.5520718693733215}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5047584772109985}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.4702771008014679}, {"id": "https://openalex.org/keywords/state", "display_name": "State (computer science)", "score": 0.46796613931655884}, {"id": "https://openalex.org/keywords/evolutionary-robotics", "display_name": "Evolutionary robotics", "score": 0.4219830334186554}, {"id": "https://openalex.org/keywords/human\u2013computer-interaction", "display_name": "Human\u2013computer interaction", "score": 0.39948809146881104}, {"id": "https://openalex.org/keywords/programming-language", "display_name": "Programming language", "score": 0.0511641800403595}], "concepts": [{"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.8903038501739502}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8824948072433472}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.6533159017562866}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.5520718693733215}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5047584772109985}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.4702771008014679}, {"id": "https://openalex.org/C48103436", "wikidata": "https://www.wikidata.org/wiki/Q599031", "display_name": "State (computer science)", "level": 2, "score": 0.46796613931655884}, {"id": "https://openalex.org/C199505168", "wikidata": "https://www.wikidata.org/wiki/Q3267529", "display_name": "Evolutionary robotics", "level": 3, "score": 0.4219830334186554}, {"id": "https://openalex.org/C107457646", "wikidata": "https://www.wikidata.org/wiki/Q207434", "display_name": "Human\u2013computer interaction", "level": 1, "score": 0.39948809146881104}, {"id": "https://openalex.org/C199360897", "wikidata": "https://www.wikidata.org/wiki/Q9143", "display_name": "Programming language", "level": 1, "score": 0.0511641800403595}], "mesh": [], "locations_count": 2, "locations": [{"id": "doi:10.1016/j.robot.2003.11.006", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.robot.2003.11.006", "pdf_url": null, "source": {"id": "https://openalex.org/S133768115", "display_name": "Robotics and Autonomous Systems", "issn_l": "0921-8890", "issn": ["0921-8890", "1872-793X"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Robotics and Autonomous Systems", "raw_type": "journal-article"}, {"id": "pmh:oai:t2r2.star.titech.ac.jp:00102566", "is_oa": false, "landing_page_url": "http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100496562", "pdf_url": null, "source": {"id": "https://openalex.org/S4377196385", "display_name": "Tokyo Tech Research Repository (Tokyo Institute of Technology)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I114531698", "host_organization_name": "Tokyo Institute of Technology", "host_organization_lineage": ["https://openalex.org/I114531698"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "Journal Article"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [{"id": "https://openalex.org/G2615231580", "display_name": null, "funder_award_id": "14750362", "funder_id": "https://openalex.org/F4320320912", "funder_display_name": "Ministry of Education, Culture, Sports, Science and Technology"}, {"id": "https://openalex.org/G2746495763", "display_name": null, "funder_award_id": "14350227", "funder_id": "https://openalex.org/F4320320912", "funder_display_name": "Ministry of Education, Culture, Sports, Science and Technology"}], "funders": [{"id": "https://openalex.org/F4320320912", "display_name": "Ministry of Education, Culture, Sports, Science and Technology", "ror": "https://ror.org/048rj2z13"}], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 17, "referenced_works": ["https://openalex.org/W135130342", "https://openalex.org/W1497256448", "https://openalex.org/W1591501902", "https://openalex.org/W1608391772", "https://openalex.org/W1994030467", "https://openalex.org/W2067547081", "https://openalex.org/W2070619758", "https://openalex.org/W2091565802", "https://openalex.org/W2114645401", "https://openalex.org/W2119072456", "https://openalex.org/W2132097429", "https://openalex.org/W2152166054", "https://openalex.org/W2162813238", "https://openalex.org/W2171277043", "https://openalex.org/W4213332169", "https://openalex.org/W4248967795", "https://openalex.org/W6725331345"], "related_works": ["https://openalex.org/W4306904969", "https://openalex.org/W2122871747", "https://openalex.org/W3114279067", "https://openalex.org/W4381586542", "https://openalex.org/W2049953310", "https://openalex.org/W4243658538", "https://openalex.org/W2545545628", "https://openalex.org/W2492394916", "https://openalex.org/W2470534575", "https://openalex.org/W4282019538"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2023, "cited_by_count": 3}, {"year": 2022, "cited_by_count": 1}, {"year": 2021, "cited_by_count": 1}, {"year": 2020, "cited_by_count": 3}, {"year": 2017, "cited_by_count": 2}, {"year": 2016, "cited_by_count": 2}, {"year": 2015, "cited_by_count": 4}, {"year": 2014, "cited_by_count": 2}, {"year": 2013, "cited_by_count": 4}, {"year": 2012, "cited_by_count": 5}], "updated_date": "2026-04-05T17:49:38.594831", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W4317038451", "doi": "https://doi.org/10.1109/tai.2023.3237665", "title": "Robotic Control in Adversarial and Sparse Reward Environments: A Robust Goal-Conditioned Reinforcement Learning Approach", "display_name": "Robotic Control in Adversarial and Sparse Reward Environments: A Robust Goal-Conditioned Reinforcement Learning Approach", "relevance_score": 197.99069, "publication_year": 2023, "publication_date": "2023-01-17", "ids": {"openalex": "https://openalex.org/W4317038451", "doi": "https://doi.org/10.1109/tai.2023.3237665"}, "language": "en", "primary_location": {"id": "doi:10.1109/tai.2023.3237665", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/tai.2023.3237665", "pdf_url": null, "source": {"id": "https://openalex.org/S4210169448", "display_name": "IEEE Transactions on Artificial Intelligence", "issn_l": "2691-4581", "issn": ["2691-4581"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319808", "host_organization_name": "Institute of Electrical and Electronics Engineers", "host_organization_lineage": ["https://openalex.org/P4310319808"], "host_organization_lineage_names": ["Institute of Electrical and Electronics Engineers"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "IEEE Transactions on Artificial Intelligence", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5061259393", "display_name": "Xiangkun He", "orcid": "https://orcid.org/0000-0001-9818-0879"}, "institutions": [{"id": "https://openalex.org/I172675005", "display_name": "Nanyang Technological University", "ror": "https://ror.org/02e7b5302", "country_code": "SG", "type": "education", "lineage": ["https://openalex.org/I172675005"]}], "countries": ["SG"], "is_corresponding": true, "raw_author_name": "Xiangkun He", "raw_affiliation_strings": ["School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore"], "raw_orcid": "https://orcid.org/0000-0001-9818-0879", "affiliations": [{"raw_affiliation_string": "School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore", "institution_ids": ["https://openalex.org/I172675005"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5072073374", "display_name": "Chen Lv", "orcid": "https://orcid.org/0000-0001-6897-4512"}, "institutions": [{"id": "https://openalex.org/I172675005", "display_name": "Nanyang Technological University", "ror": "https://ror.org/02e7b5302", "country_code": "SG", "type": "education", "lineage": ["https://openalex.org/I172675005"]}], "countries": ["SG"], "is_corresponding": false, "raw_author_name": "Chen Lv", "raw_affiliation_strings": ["School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore"], "raw_orcid": "https://orcid.org/0000-0001-6897-4512", "affiliations": [{"raw_affiliation_string": "School of Mechanical and Aerospace Engineering, Nanyang Technological University, Singapore", "institution_ids": ["https://openalex.org/I172675005"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 2, "corresponding_author_ids": ["https://openalex.org/A5061259393"], "corresponding_institution_ids": ["https://openalex.org/I172675005"], "apc_list": null, "apc_paid": null, "fwci": 4.3884, "has_fulltext": false, "cited_by_count": 26, "citation_normalized_percentile": {"value": 0.95392121, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 97, "max": 99}, "biblio": {"volume": "5", "issue": "1", "first_page": "244", "last_page": "253"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9908000230789185, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9908000230789185, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11689", "display_name": "Adversarial Robustness in Machine Learning", "score": 0.9848999977111816, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11099", "display_name": "Autonomous Vehicle Technology and Safety", "score": 0.9433000087738037, "subfield": {"id": "https://openalex.org/subfields/2203", "display_name": "Automotive Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7528084516525269}, {"id": "https://openalex.org/keywords/adversarial-system", "display_name": "Adversarial system", "score": 0.5929465293884277}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.5676349401473999}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.49889445304870605}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.4937637746334076}, {"id": "https://openalex.org/keywords/reinforcement", "display_name": "Reinforcement", "score": 0.49021464586257935}, {"id": "https://openalex.org/keywords/psychology", "display_name": "Psychology", "score": 0.4168988764286041}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.348644495010376}, {"id": "https://openalex.org/keywords/social-psychology", "display_name": "Social psychology", "score": 0.0943710207939148}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7528084516525269}, {"id": "https://openalex.org/C37736160", "wikidata": "https://www.wikidata.org/wiki/Q1801315", "display_name": "Adversarial system", "level": 2, "score": 0.5929465293884277}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.5676349401473999}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.49889445304870605}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.4937637746334076}, {"id": "https://openalex.org/C67203356", "wikidata": "https://www.wikidata.org/wiki/Q1321905", "display_name": "Reinforcement", "level": 2, "score": 0.49021464586257935}, {"id": "https://openalex.org/C15744967", "wikidata": "https://www.wikidata.org/wiki/Q9418", "display_name": "Psychology", "level": 0, "score": 0.4168988764286041}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.348644495010376}, {"id": "https://openalex.org/C77805123", "wikidata": "https://www.wikidata.org/wiki/Q161272", "display_name": "Social psychology", "level": 1, "score": 0.0943710207939148}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1109/tai.2023.3237665", "is_oa": false, "landing_page_url": "https://doi.org/10.1109/tai.2023.3237665", "pdf_url": null, "source": {"id": "https://openalex.org/S4210169448", "display_name": "IEEE Transactions on Artificial Intelligence", "issn_l": "2691-4581", "issn": ["2691-4581"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319808", "host_organization_name": "Institute of Electrical and Electronics Engineers", "host_organization_lineage": ["https://openalex.org/P4310319808"], "host_organization_lineage_names": ["Institute of Electrical and Electronics Engineers"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "IEEE Transactions on Artificial Intelligence", "raw_type": "journal-article"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [{"id": "https://openalex.org/F4320320696", "display_name": "Agency for Science, Technology and Research", "ror": "https://ror.org/036wvzt09"}], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 43, "referenced_works": ["https://openalex.org/W2145339207", "https://openalex.org/W2460299708", "https://openalex.org/W2571136349", "https://openalex.org/W2773691349", "https://openalex.org/W2799194071", "https://openalex.org/W2899474210", "https://openalex.org/W2901112449", "https://openalex.org/W2907537824", "https://openalex.org/W2911087563", "https://openalex.org/W2912971983", "https://openalex.org/W2967727187", "https://openalex.org/W2981446616", "https://openalex.org/W3016434138", "https://openalex.org/W3037694644", "https://openalex.org/W3041296001", "https://openalex.org/W3045059767", "https://openalex.org/W3092395313", "https://openalex.org/W3103532359", "https://openalex.org/W3135301907", "https://openalex.org/W3163878499", "https://openalex.org/W3174559407", "https://openalex.org/W3200885897", "https://openalex.org/W3206436203", "https://openalex.org/W3206620955", "https://openalex.org/W3213496246", "https://openalex.org/W4225773298", "https://openalex.org/W4250589301", "https://openalex.org/W4287113380", "https://openalex.org/W4308261669", "https://openalex.org/W6735677848", "https://openalex.org/W6740801417", "https://openalex.org/W6747027214", "https://openalex.org/W6747473740", "https://openalex.org/W6748523217", "https://openalex.org/W6748839928", "https://openalex.org/W6759312646", "https://openalex.org/W6761833289", "https://openalex.org/W6762640273", "https://openalex.org/W6772562993", "https://openalex.org/W6779795787", "https://openalex.org/W6780186808", "https://openalex.org/W6780559895", "https://openalex.org/W6803597800"], "related_works": ["https://openalex.org/W2502115930", "https://openalex.org/W2482350142", "https://openalex.org/W4246396837", "https://openalex.org/W3126451824", "https://openalex.org/W1561927205", "https://openalex.org/W3191453585", "https://openalex.org/W4297672492", "https://openalex.org/W4310988119", "https://openalex.org/W4285226279", "https://openalex.org/W4288019534"], "abstract_inverted_index": {"With": [0], "deep": [1], "neural": [2], "networks": [3], "based": [4], "function": [5], "approximators,": [6], "reinforcement": [7, 80], "learning": [8, 13, 37, 81], "holds": [9], "the": [10, 136, 141, 159, 162, 169, 199], "promise": [11], "of": [12, 52, 161], "complex": [14], "end-to-end": [15, 84], "robotic": [16, 85, 192], "controllers": [17], "that": [18, 187], "can": [19, 190], "map": [20], "high-dimensional": [21], "sensory": [22], "information": [23], "directly": [24], "to": [25, 47, 55, 70, 101, 125, 153], "control": [26, 86, 193], "policies.": [27], "However,": [28], "a": [29, 49, 57, 76, 94, 115, 127, 131, 146], "common": [30], "challenge,": [31], "especially": [32], "for": [33, 83], "robotics,": [34], "is": [35, 45, 99, 123, 151, 172], "sample-efficient": [36], "from": [38], "sparse": [39, 90, 181, 202], "rewards,": [40], "in": [41, 87], "which": [42], "an": [43], "agent": [44], "required": [46], "find": [48], "long": [50], "sequence": [51], "\u201ccorrect\u201d": [53], "actions": [54], "achieve": [56], "desired": [58], "outcome.": [59], "Unfortunately,": [60], "inevitable": [61], "perturbations": [62, 105, 122], "on": [63, 106, 174, 198], "observations": [64, 107], "may": [65], "make": [66], "this": [67, 73], "task": [68], "trickier": [69], "solve.": [71], "Here,": [72], "paper": [74], "advances": [75], "novel": [77], "robust": [78, 147], "goal-conditioned": [79, 148, 155], "approach": [82], "adversarial": [88, 96, 104, 143, 178, 200], "and": [89, 111, 134, 157, 180, 195, 201], "reward": [91, 182, 203], "environments.": [92], "Specifically,": [93], "mixed": [95, 142], "attack": [97], "scheme": [98, 189], "presented": [100], "generate": [102, 135], "diverse": [103], "by": [108, 140], "combining": [109], "white-box": [110], "black-box": [112], "attacks.": [113, 144], "Meanwhile,": [114], "hindsight": [116], "experience": [117, 129], "replay": [118], "technique": [119], "considering": [120], "observation": [121], "developed": [124], "turn": [126], "failed": [128], "into": [130], "successful": [132], "one": [133], "policy": [137, 164, 196], "trajectories": [138, 165], "perturbed": [139, 163], "Additionally,": [145], "actor-critic": [149], "method": [150, 171], "proposed": [152, 170], "learn": [154], "policies": [156], "keep": [158], "variations": [160], "within": [166], "bounds.": [167], "Finally,": [168], "evaluated": [173], "three": [175], "tasks": [176], "with": [177], "attacks": [179], "settings.": [183], "The": [184], "results": [185], "indicate": [186], "our": [188], "ensure": [191], "performance": [194], "robustness": [197], "tasks.": [204]}, "counts_by_year": [{"year": 2026, "cited_by_count": 2}, {"year": 2025, "cited_by_count": 7}, {"year": 2024, "cited_by_count": 13}, {"year": 2023, "cited_by_count": 4}], "updated_date": "2026-05-05T08:41:31.759640", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2742169147", "doi": "https://doi.org/10.1007/978-3-319-65289-4_17", "title": "Toward Effective Soft Robot Control via Reinforcement Learning", "display_name": "Toward Effective Soft Robot Control via Reinforcement Learning", "relevance_score": 195.56093, "publication_year": 2017, "publication_date": "2017-01-01", "ids": {"openalex": "https://openalex.org/W2742169147", "doi": "https://doi.org/10.1007/978-3-319-65289-4_17", "mag": "2742169147"}, "language": "en", "primary_location": {"id": "doi:10.1007/978-3-319-65289-4_17", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/978-3-319-65289-4_17", "pdf_url": null, "source": {"id": "https://openalex.org/S106296714", "display_name": "Lecture notes in computer science", "issn_l": "0302-9743", "issn": ["0302-9743", "1611-3349"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "book series"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Lecture Notes in Computer Science", "raw_type": "book-chapter"}, "type": "book-chapter", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5018060361", "display_name": "Haochong Zhang", "orcid": null}, "institutions": [{"id": "https://openalex.org/I126520041", "display_name": "University of Science and Technology of China", "ror": "https://ror.org/04c4dkn09", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I126520041", "https://openalex.org/I19820366"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Haochong Zhang", "raw_affiliation_strings": ["University of Science and Technology of China, Hefei, 230027, Anhui, China"], "affiliations": [{"raw_affiliation_string": "University of Science and Technology of China, Hefei, 230027, Anhui, China", "institution_ids": ["https://openalex.org/I126520041"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5009602756", "display_name": "Rongyun Cao", "orcid": null}, "institutions": [{"id": "https://openalex.org/I126520041", "display_name": "University of Science and Technology of China", "ror": "https://ror.org/04c4dkn09", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I126520041", "https://openalex.org/I19820366"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Rongyun Cao", "raw_affiliation_strings": ["University of Science and Technology of China, Hefei, 230027, Anhui, China"], "affiliations": [{"raw_affiliation_string": "University of Science and Technology of China, Hefei, 230027, Anhui, China", "institution_ids": ["https://openalex.org/I126520041"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5027224308", "display_name": "Shlomo Zilberstein", "orcid": "https://orcid.org/0000-0001-9817-7848"}, "institutions": [{"id": "https://openalex.org/I24603500", "display_name": "University of Massachusetts Amherst", "ror": "https://ror.org/0072zz521", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I24603500"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Shlomo Zilberstein", "raw_affiliation_strings": ["University of Massachusetts Amherst, Amherst, MA, 01003-9264, USA"], "affiliations": [{"raw_affiliation_string": "University of Massachusetts Amherst, Amherst, MA, 01003-9264, USA", "institution_ids": ["https://openalex.org/I24603500"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5029102289", "display_name": "Feng Wu", "orcid": "https://orcid.org/0000-0003-3989-0509"}, "institutions": [{"id": "https://openalex.org/I126520041", "display_name": "University of Science and Technology of China", "ror": "https://ror.org/04c4dkn09", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I126520041", "https://openalex.org/I19820366"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Feng Wu", "raw_affiliation_strings": ["University of Science and Technology of China, Hefei, 230027, Anhui, China"], "affiliations": [{"raw_affiliation_string": "University of Science and Technology of China, Hefei, 230027, Anhui, China", "institution_ids": ["https://openalex.org/I126520041"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5084710341", "display_name": "Xiaoping Chen", "orcid": "https://orcid.org/0000-0001-8992-9286"}, "institutions": [{"id": "https://openalex.org/I126520041", "display_name": "University of Science and Technology of China", "ror": "https://ror.org/04c4dkn09", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I126520041", "https://openalex.org/I19820366"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Xiaoping Chen", "raw_affiliation_strings": ["University of Science and Technology of China, Hefei, 230027, Anhui, China"], "affiliations": [{"raw_affiliation_string": "University of Science and Technology of China, Hefei, 230027, Anhui, China", "institution_ids": ["https://openalex.org/I126520041"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 5, "corresponding_author_ids": ["https://openalex.org/A5018060361"], "corresponding_institution_ids": ["https://openalex.org/I126520041"], "apc_list": {"value": 5000, "currency": "EUR", "value_usd": 5392}, "apc_paid": null, "fwci": 8.214, "has_fulltext": false, "cited_by_count": 48, "citation_normalized_percentile": {"value": 0.98320871, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 94, "max": 99}, "biblio": {"volume": null, "issue": null, "first_page": "173", "last_page": "184"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10868", "display_name": "Soft Robotics and Applications", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10868", "display_name": "Soft Robotics and Applications", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11486", "display_name": "Micro and Nano Robotics", "score": 0.9959999918937683, "subfield": {"id": "https://openalex.org/subfields/3104", "display_name": "Condensed Matter Physics"}, "field": {"id": "https://openalex.org/fields/31", "display_name": "Physics and Astronomy"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12784", "display_name": "Modular Robots and Swarm Intelligence", "score": 0.9865000247955322, "subfield": {"id": "https://openalex.org/subfields/2210", "display_name": "Mechanical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8420349359512329}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.8071468472480774}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7579704523086548}, {"id": "https://openalex.org/keywords/process", "display_name": "Process (computing)", "score": 0.5487931966781616}, {"id": "https://openalex.org/keywords/representation", "display_name": "Representation (politics)", "score": 0.5265448689460754}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5240036249160767}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.49410489201545715}, {"id": "https://openalex.org/keywords/key", "display_name": "Key (lock)", "score": 0.48107075691223145}, {"id": "https://openalex.org/keywords/soft-robotics", "display_name": "Soft robotics", "score": 0.47242581844329834}, {"id": "https://openalex.org/keywords/robot-control", "display_name": "Robot control", "score": 0.4267944395542145}, {"id": "https://openalex.org/keywords/control-engineering", "display_name": "Control engineering", "score": 0.4089146554470062}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.2290496826171875}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.11558309197425842}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8420349359512329}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.8071468472480774}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7579704523086548}, {"id": "https://openalex.org/C98045186", "wikidata": "https://www.wikidata.org/wiki/Q205663", "display_name": "Process (computing)", "level": 2, "score": 0.5487931966781616}, {"id": "https://openalex.org/C2776359362", "wikidata": "https://www.wikidata.org/wiki/Q2145286", "display_name": "Representation (politics)", "level": 3, "score": 0.5265448689460754}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5240036249160767}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.49410489201545715}, {"id": "https://openalex.org/C26517878", "wikidata": "https://www.wikidata.org/wiki/Q228039", "display_name": "Key (lock)", "level": 2, "score": 0.48107075691223145}, {"id": "https://openalex.org/C2776058767", "wikidata": "https://www.wikidata.org/wiki/Q24327151", "display_name": "Soft robotics", "level": 3, "score": 0.47242581844329834}, {"id": "https://openalex.org/C65401140", "wikidata": "https://www.wikidata.org/wiki/Q7353385", "display_name": "Robot control", "level": 4, "score": 0.4267944395542145}, {"id": "https://openalex.org/C133731056", "wikidata": "https://www.wikidata.org/wiki/Q4917288", "display_name": "Control engineering", "level": 1, "score": 0.4089146554470062}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.2290496826171875}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.11558309197425842}, {"id": "https://openalex.org/C17744445", "wikidata": "https://www.wikidata.org/wiki/Q36442", "display_name": "Political science", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C38652104", "wikidata": "https://www.wikidata.org/wiki/Q3510521", "display_name": "Computer security", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C94625758", "wikidata": "https://www.wikidata.org/wiki/Q7163", "display_name": "Politics", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C199539241", "wikidata": "https://www.wikidata.org/wiki/Q7748", "display_name": "Law", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1007/978-3-319-65289-4_17", "is_oa": false, "landing_page_url": "https://doi.org/10.1007/978-3-319-65289-4_17", "pdf_url": null, "source": {"id": "https://openalex.org/S106296714", "display_name": "Lecture notes in computer science", "issn_l": "0302-9743", "issn": ["0302-9743", "1611-3349"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "book series"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Lecture Notes in Computer Science", "raw_type": "book-chapter"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 30, "referenced_works": ["https://openalex.org/W199177729", "https://openalex.org/W640042938", "https://openalex.org/W1227556793", "https://openalex.org/W1515749184", "https://openalex.org/W1521258934", "https://openalex.org/W1594979435", "https://openalex.org/W1646707810", "https://openalex.org/W1730458554", "https://openalex.org/W1921016406", "https://openalex.org/W1969107377", "https://openalex.org/W2027454283", "https://openalex.org/W2063507564", "https://openalex.org/W2063797945", "https://openalex.org/W2073677587", "https://openalex.org/W2097770951", "https://openalex.org/W2101667962", "https://openalex.org/W2105271646", "https://openalex.org/W2124540383", "https://openalex.org/W2141905643", "https://openalex.org/W2164102968", "https://openalex.org/W2230336266", "https://openalex.org/W2346769424", "https://openalex.org/W2394644742", "https://openalex.org/W2400243651", "https://openalex.org/W2560730116", "https://openalex.org/W2742015494", "https://openalex.org/W2965916140", "https://openalex.org/W3137141854", "https://openalex.org/W4211089519", "https://openalex.org/W4244367163"], "related_works": ["https://openalex.org/W4306904969", "https://openalex.org/W2138720691", "https://openalex.org/W4362501864", "https://openalex.org/W4380318855", "https://openalex.org/W3084456289", "https://openalex.org/W2024136090", "https://openalex.org/W2171912896", "https://openalex.org/W2542723153", "https://openalex.org/W2021541810", "https://openalex.org/W2133150803"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2025, "cited_by_count": 5}, {"year": 2024, "cited_by_count": 8}, {"year": 2023, "cited_by_count": 4}, {"year": 2022, "cited_by_count": 13}, {"year": 2021, "cited_by_count": 6}, {"year": 2020, "cited_by_count": 5}, {"year": 2019, "cited_by_count": 3}, {"year": 2018, "cited_by_count": 2}, {"year": 2017, "cited_by_count": 2}], "updated_date": "2026-03-25T14:56:36.534964", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3008492644", "doi": "https://doi.org/10.3390/app10051555", "title": "Real\u2013Sim\u2013Real Transfer for Real-World Robot Control Policy Learning with Deep Reinforcement Learning", "display_name": "Real\u2013Sim\u2013Real Transfer for Real-World Robot Control Policy Learning with Deep Reinforcement Learning", "relevance_score": 195.45496, "publication_year": 2020, "publication_date": "2020-02-25", "ids": {"openalex": "https://openalex.org/W3008492644", "doi": "https://doi.org/10.3390/app10051555", "mag": "3008492644"}, "language": "en", "primary_location": {"id": "doi:10.3390/app10051555", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/app10051555", "pdf_url": "https://www.mdpi.com/2076-3417/10/5/1555/pdf?version=1583318438", "source": {"id": "https://openalex.org/S4210205812", "display_name": "Applied Sciences", "issn_l": "2076-3417", "issn": ["2076-3417"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Applied Sciences", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref", "doaj"], "open_access": {"is_oa": true, "oa_status": "gold", "oa_url": "https://www.mdpi.com/2076-3417/10/5/1555/pdf?version=1583318438", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5107272479", "display_name": "Naijun Liu", "orcid": "https://orcid.org/0009-0007-6623-0799"}, "institutions": [{"id": "https://openalex.org/I19820366", "display_name": "Chinese Academy of Sciences", "ror": "https://ror.org/034t30j35", "country_code": "CN", "type": "government", "lineage": ["https://openalex.org/I19820366"]}, {"id": "https://openalex.org/I4210094879", "display_name": "Shandong Institute of Automation", "ror": "https://ror.org/00qdtba35", "country_code": "CN", "type": "facility", "lineage": ["https://openalex.org/I4210094879", "https://openalex.org/I4210142748"]}, {"id": "https://openalex.org/I4210165038", "display_name": "University of Chinese Academy of Sciences", "ror": "https://ror.org/05qbk4x57", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I19820366", "https://openalex.org/I4210165038"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Naijun Liu", "raw_affiliation_strings": ["State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China", "University of Chinese Academy of Sciences, Beijing 100190, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China", "institution_ids": ["https://openalex.org/I4210094879", "https://openalex.org/I19820366"]}, {"raw_affiliation_string": "University of Chinese Academy of Sciences, Beijing 100190, China", "institution_ids": ["https://openalex.org/I4210165038"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5103021731", "display_name": "Yinghao Cai", "orcid": "https://orcid.org/0000-0003-3024-2943"}, "institutions": [{"id": "https://openalex.org/I19820366", "display_name": "Chinese Academy of Sciences", "ror": "https://ror.org/034t30j35", "country_code": "CN", "type": "government", "lineage": ["https://openalex.org/I19820366"]}, {"id": "https://openalex.org/I4210094879", "display_name": "Shandong Institute of Automation", "ror": "https://ror.org/00qdtba35", "country_code": "CN", "type": "facility", "lineage": ["https://openalex.org/I4210094879", "https://openalex.org/I4210142748"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Yinghao Cai", "raw_affiliation_strings": ["State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China", "institution_ids": ["https://openalex.org/I4210094879", "https://openalex.org/I19820366"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5111683010", "display_name": "Tao Lu", "orcid": "https://orcid.org/0000-0003-3374-5845"}, "institutions": [{"id": "https://openalex.org/I19820366", "display_name": "Chinese Academy of Sciences", "ror": "https://ror.org/034t30j35", "country_code": "CN", "type": "government", "lineage": ["https://openalex.org/I19820366"]}, {"id": "https://openalex.org/I4210094879", "display_name": "Shandong Institute of Automation", "ror": "https://ror.org/00qdtba35", "country_code": "CN", "type": "facility", "lineage": ["https://openalex.org/I4210094879", "https://openalex.org/I4210142748"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Tao Lu", "raw_affiliation_strings": ["State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China", "institution_ids": ["https://openalex.org/I4210094879", "https://openalex.org/I19820366"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5100431163", "display_name": "Rui Wang", "orcid": "https://orcid.org/0000-0001-9048-2979"}, "institutions": [{"id": "https://openalex.org/I19820366", "display_name": "Chinese Academy of Sciences", "ror": "https://ror.org/034t30j35", "country_code": "CN", "type": "government", "lineage": ["https://openalex.org/I19820366"]}, {"id": "https://openalex.org/I4210094879", "display_name": "Shandong Institute of Automation", "ror": "https://ror.org/00qdtba35", "country_code": "CN", "type": "facility", "lineage": ["https://openalex.org/I4210094879", "https://openalex.org/I4210142748"]}, {"id": "https://openalex.org/I4210145761", "display_name": "Shenzhen Institutes of Advanced Technology", "ror": "https://ror.org/04gh4er46", "country_code": "CN", "type": "facility", "lineage": ["https://openalex.org/I19820366", "https://openalex.org/I4210145761"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Rui Wang", "raw_affiliation_strings": ["Guangdong Provincial Key Lab of Robotics and Intelligent System, Shenzhen Institutes of Advanced Technology Chinese Academy of Sciences, Shenzhen 518055, China", "State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Guangdong Provincial Key Lab of Robotics and Intelligent System, Shenzhen Institutes of Advanced Technology Chinese Academy of Sciences, Shenzhen 518055, China", "institution_ids": ["https://openalex.org/I4210145761", "https://openalex.org/I19820366"]}, {"raw_affiliation_string": "State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China", "institution_ids": ["https://openalex.org/I4210094879", "https://openalex.org/I19820366"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5100639215", "display_name": "Shuo Wang", "orcid": "https://orcid.org/0000-0002-1390-9219"}, "institutions": [{"id": "https://openalex.org/I19820366", "display_name": "Chinese Academy of Sciences", "ror": "https://ror.org/034t30j35", "country_code": "CN", "type": "government", "lineage": ["https://openalex.org/I19820366"]}, {"id": "https://openalex.org/I4210094879", "display_name": "Shandong Institute of Automation", "ror": "https://ror.org/00qdtba35", "country_code": "CN", "type": "facility", "lineage": ["https://openalex.org/I4210094879", "https://openalex.org/I4210142748"]}, {"id": "https://openalex.org/I4210097554", "display_name": "Center for Excellence in Brain Science and Intelligence Technology", "ror": "https://ror.org/00vpwhm04", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I19820366", "https://openalex.org/I4210097554"]}, {"id": "https://openalex.org/I4210165038", "display_name": "University of Chinese Academy of Sciences", "ror": "https://ror.org/05qbk4x57", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I19820366", "https://openalex.org/I4210165038"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Shuo Wang", "raw_affiliation_strings": ["Center for Excellence in Brain Science and Intelligence Technology of the Chinese Academy of Sciences, Shanghai 200031, China", "State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China", "University of Chinese Academy of Sciences, Beijing 100190, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Center for Excellence in Brain Science and Intelligence Technology of the Chinese Academy of Sciences, Shanghai 200031, China", "institution_ids": ["https://openalex.org/I4210097554", "https://openalex.org/I19820366"]}, {"raw_affiliation_string": "State Key Laboratory of Management and Control for Complex Systems, Institute of Automation Chinese Academy of Sciences, Beijing 100190, China", "institution_ids": ["https://openalex.org/I4210094879", "https://openalex.org/I19820366"]}, {"raw_affiliation_string": "University of Chinese Academy of Sciences, Beijing 100190, China", "institution_ids": ["https://openalex.org/I4210165038"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 5, "corresponding_author_ids": ["https://openalex.org/A5100639215", "https://openalex.org/A5103021731", "https://openalex.org/A5111683010"], "corresponding_institution_ids": ["https://openalex.org/I19820366", "https://openalex.org/I4210094879", "https://openalex.org/I4210097554", "https://openalex.org/I4210165038"], "apc_list": {"value": 2300, "currency": "CHF", "value_usd": 2490}, "apc_paid": {"value": 2300, "currency": "CHF", "value_usd": 2490}, "fwci": 2.312, "has_fulltext": true, "cited_by_count": 29, "citation_normalized_percentile": {"value": 0.905466, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 96, "max": 99}, "biblio": {"volume": "10", "issue": "5", "first_page": "1555", "last_page": "1555"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9995999932289124, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9993000030517578, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12072", "display_name": "Machine Learning and Algorithms", "score": 0.9721999764442444, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7102282643318176}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.6292997598648071}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5742224454879761}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.5189782977104187}, {"id": "https://openalex.org/keywords/transfer-of-learning", "display_name": "Transfer of learning", "score": 0.5132951140403748}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.49127906560897827}, {"id": "https://openalex.org/keywords/real-world-data", "display_name": "Real world data", "score": 0.47591620683670044}, {"id": "https://openalex.org/keywords/inference", "display_name": "Inference", "score": 0.4739373028278351}, {"id": "https://openalex.org/keywords/task", "display_name": "Task (project management)", "score": 0.44619306921958923}, {"id": "https://openalex.org/keywords/generalization", "display_name": "Generalization", "score": 0.4429875612258911}, {"id": "https://openalex.org/keywords/negative-transfer", "display_name": "Negative transfer", "score": 0.43215373158454895}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.4057791233062744}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.2104373276233673}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7102282643318176}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.6292997598648071}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5742224454879761}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.5189782977104187}, {"id": "https://openalex.org/C150899416", "wikidata": "https://www.wikidata.org/wiki/Q1820378", "display_name": "Transfer of learning", "level": 2, "score": 0.5132951140403748}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.49127906560897827}, {"id": "https://openalex.org/C3020493868", "wikidata": "https://www.wikidata.org/wiki/Q55631277", "display_name": "Real world data", "level": 2, "score": 0.47591620683670044}, {"id": "https://openalex.org/C2776214188", "wikidata": "https://www.wikidata.org/wiki/Q408386", "display_name": "Inference", "level": 2, "score": 0.4739373028278351}, {"id": "https://openalex.org/C2780451532", "wikidata": "https://www.wikidata.org/wiki/Q759676", "display_name": "Task (project management)", "level": 2, "score": 0.44619306921958923}, {"id": "https://openalex.org/C177148314", "wikidata": "https://www.wikidata.org/wiki/Q170084", "display_name": "Generalization", "level": 2, "score": 0.4429875612258911}, {"id": "https://openalex.org/C2779178101", "wikidata": "https://www.wikidata.org/wiki/Q6987274", "display_name": "Negative transfer", "level": 3, "score": 0.43215373158454895}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.4057791233062744}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.2104373276233673}, {"id": "https://openalex.org/C41895202", "wikidata": "https://www.wikidata.org/wiki/Q8162", "display_name": "Linguistics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C171041071", "wikidata": "https://www.wikidata.org/wiki/Q36870", "display_name": "First language", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C2522767166", "wikidata": "https://www.wikidata.org/wiki/Q2374463", "display_name": "Data science", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C138885662", "wikidata": "https://www.wikidata.org/wiki/Q5891", "display_name": "Philosophy", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C134306372", "wikidata": "https://www.wikidata.org/wiki/Q7754", "display_name": "Mathematical analysis", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C201995342", "wikidata": "https://www.wikidata.org/wiki/Q682496", "display_name": "Systems engineering", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 3, "locations": [{"id": "doi:10.3390/app10051555", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/app10051555", "pdf_url": "https://www.mdpi.com/2076-3417/10/5/1555/pdf?version=1583318438", "source": {"id": "https://openalex.org/S4210205812", "display_name": "Applied Sciences", "issn_l": "2076-3417", "issn": ["2076-3417"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Applied Sciences", "raw_type": "journal-article"}, {"id": "pmh:oai:doaj.org/article:999c6c3f45c441c88e3cd95711828acb", "is_oa": true, "landing_page_url": "https://doaj.org/article/999c6c3f45c441c88e3cd95711828acb", "pdf_url": null, "source": {"id": "https://openalex.org/S4306401280", "display_name": "DOAJ (DOAJ: Directory of Open Access Journals)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by-sa", "license_id": "https://openalex.org/licenses/cc-by-sa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Applied Sciences, Vol 10, Iss 5, p 1555 (2020)", "raw_type": "article"}, {"id": "pmh:oai:mdpi.com:/2076-3417/10/5/1555/", "is_oa": true, "landing_page_url": "http://dx.doi.org/10.3390/app10051555", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400947", "display_name": "MDPI (MDPI AG)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I4210097602", "host_organization_name": "Multidisciplinary Digital Publishing Institute (Switzerland)", "host_organization_lineage": ["https://openalex.org/I4210097602"], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Applied Sciences", "raw_type": "Text"}], "best_oa_location": {"id": "doi:10.3390/app10051555", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/app10051555", "pdf_url": "https://www.mdpi.com/2076-3417/10/5/1555/pdf?version=1583318438", "source": {"id": "https://openalex.org/S4210205812", "display_name": "Applied Sciences", "issn_l": "2076-3417", "issn": ["2076-3417"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Applied Sciences", "raw_type": "journal-article"}, "sustainable_development_goals": [], "awards": [{"id": "https://openalex.org/G3511765009", "display_name": null, "funder_award_id": "Grant Nos. 61773378, U1713222, and U1806204", "funder_id": "https://openalex.org/F4320321001", "funder_display_name": "National Natural Science Foundation of China"}, {"id": "https://openalex.org/G4323752568", "display_name": null, "funder_award_id": "U1713222", "funder_id": "https://openalex.org/F4320321001", "funder_display_name": "National Natural Science Foundation of China"}, {"id": "https://openalex.org/G4525805536", "display_name": null, "funder_award_id": "U1806204", "funder_id": "https://openalex.org/F4320321001", "funder_display_name": "National Natural Science Foundation of China"}, {"id": "https://openalex.org/G8619362520", "display_name": null, "funder_award_id": "61773378", "funder_id": "https://openalex.org/F4320321001", "funder_display_name": "National Natural Science Foundation of China"}], "funders": [{"id": "https://openalex.org/F4320321001", "display_name": "National Natural Science Foundation of China", "ror": "https://ror.org/01h0zpd94"}], "has_content": {"pdf": true, "grobid_xml": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W3008492644.pdf", "grobid_xml": "https://content.openalex.org/works/W3008492644.grobid-xml"}, "referenced_works_count": 47, "referenced_works": ["https://openalex.org/W1484499605", "https://openalex.org/W1587009522", "https://openalex.org/W1686810756", "https://openalex.org/W1977655452", "https://openalex.org/W1980969546", "https://openalex.org/W2036637075", "https://openalex.org/W2060914855", "https://openalex.org/W2064355448", "https://openalex.org/W2088219864", "https://openalex.org/W2121615981", "https://openalex.org/W2124267516", "https://openalex.org/W2145339207", "https://openalex.org/W2154543878", "https://openalex.org/W2155007355", "https://openalex.org/W2156219259", "https://openalex.org/W2157072005", "https://openalex.org/W2158782408", "https://openalex.org/W2201912979", "https://openalex.org/W2260756217", "https://openalex.org/W2395611524", "https://openalex.org/W2521863123", "https://openalex.org/W2600030077", "https://openalex.org/W2605102758", "https://openalex.org/W2737215781", "https://openalex.org/W2745868649", "https://openalex.org/W2766447205", "https://openalex.org/W2767050701", "https://openalex.org/W2770085058", "https://openalex.org/W2962736495", "https://openalex.org/W2962837436", "https://openalex.org/W2962894046", "https://openalex.org/W2962899390", "https://openalex.org/W2962957005", "https://openalex.org/W2963027561", "https://openalex.org/W2963184124", "https://openalex.org/W2963669336", "https://openalex.org/W2963713397", "https://openalex.org/W2964161785", "https://openalex.org/W3100789280", "https://openalex.org/W3101442004", "https://openalex.org/W3112422759", "https://openalex.org/W4214717370", "https://openalex.org/W6638018090", "https://openalex.org/W6672881665", "https://openalex.org/W6682849425", "https://openalex.org/W6684205842", "https://openalex.org/W6728925229"], "related_works": ["https://openalex.org/W4306904969", "https://openalex.org/W3162204513", "https://openalex.org/W2138720691", "https://openalex.org/W4362501864", "https://openalex.org/W4380318855", "https://openalex.org/W2031695474", "https://openalex.org/W2398668521", "https://openalex.org/W2619137770", "https://openalex.org/W2548360214", "https://openalex.org/W2952841984"], "abstract_inverted_index": {"Compared": [0], "to": [1, 16, 20, 44, 55, 60, 80, 163], "traditional": [2], "data-driven": [3], "learning": [4, 10, 28], "methods,": [5], "recently": [6], "developed": [7], "deep": [8], "reinforcement": [9], "(DRL)": [11], "approaches": [12], "can": [13, 188], "be": [14, 77], "employed": [15], "train": [17, 45, 189], "robot": [18, 166, 179], "agents": [19], "obtain": [21], "control": [22, 29, 164, 180], "policies": [23, 30, 46, 54, 70, 191], "with": [24, 143, 192], "appealing": [25], "performance.": [26], "However,": [27], "for": [31], "real-world": [32, 56, 67, 132, 168, 172], "robots": [33], "through": [34], "DRL": [35, 145], "is": [36, 43, 88, 124, 141, 160], "costly": [37], "and": [38, 50, 66, 110, 134, 137, 196], "cumbersome.": [39], "A": [40], "promising": [41], "alternative": [42], "in": [47, 72, 147, 167, 176], "simulated": [48, 65, 73, 122, 150], "environments": [49, 74], "transfer": [51, 102], "the": [52, 61, 69, 81, 85, 116, 131, 144, 148, 153, 157, 165, 184], "learned": [53, 71, 158], "scenarios.": [57], "Unfortunately,": [58], "due": [59], "reality": [62, 86], "gap": [63, 87], "between": [64], "environments,": [68], "often": [75], "cannot": [76], "generalized": [78], "well": [79], "real": [82], "world.": [83], "Bridging": [84], "still": [89], "a": [90, 98, 106, 111, 120, 139], "challenging": [91], "problem.": [92], "In": [93, 115, 152], "this": [94], "paper,": [95], "we": [96], "propose": [97], "novel": [99], "real\u2013sim\u2013real": [100], "(RSR)": [101], "method": [103, 146, 187], "that": [104, 183], "includes": [105], "real-to-sim": [107, 117], "training": [108, 118, 199], "phase": [109], "sim-to-real": [112, 154], "inference": [113, 155], "phase.": [114], "phase,": [119, 156], "task-relevant": [121], "environment": [123], "constructed": [125], "based": [126], "on": [127], "semantic": [128], "information": [129], "of": [130], "scenario": [133], "coordinate": [135], "transformation,": [136], "then": [138], "policy": [140, 159], "trained": [142], "built": [149], "environment.": [151], "directly": [161], "applied": [162], "scenarios": [169], "without": [170], "any": [171], "data.": [173], "Experimental": [174], "results": [175], "two": [177], "different": [178], "tasks": [181], "show": [182], "proposed": [185], "RSR": [186], "skill": [190], "high": [193], "generalization": [194], "performance": [195], "significantly": [197], "low": [198], "costs.": [200]}, "counts_by_year": [{"year": 2026, "cited_by_count": 2}, {"year": 2025, "cited_by_count": 3}, {"year": 2024, "cited_by_count": 7}, {"year": 2023, "cited_by_count": 5}, {"year": 2022, "cited_by_count": 4}, {"year": 2021, "cited_by_count": 8}], "updated_date": "2026-05-21T09:19:25.381259", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W4388911926", "doi": "https://doi.org/10.1007/s10846-023-02003-0", "title": "A Reinforcement Learning Approach for Continuum Robot Control", "display_name": "A Reinforcement Learning Approach for Continuum Robot Control", "relevance_score": 191.3816, "publication_year": 2023, "publication_date": "2023-11-22", "ids": {"openalex": "https://openalex.org/W4388911926", "doi": "https://doi.org/10.1007/s10846-023-02003-0"}, "language": "en", "primary_location": {"id": "doi:10.1007/s10846-023-02003-0", "is_oa": true, "landing_page_url": "https://doi.org/10.1007/s10846-023-02003-0", "pdf_url": "https://link.springer.com/content/pdf/10.1007/s10846-023-02003-0.pdf", "source": {"id": "https://openalex.org/S91329792", "display_name": "Journal of Intelligent & Robotic Systems", "issn_l": "0921-0296", "issn": ["0921-0296", "1573-0409"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Journal of Intelligent &amp; Robotic Systems", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "hybrid", "oa_url": "https://link.springer.com/content/pdf/10.1007/s10846-023-02003-0.pdf", "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5092349081", "display_name": "Turhan Can Kargin", "orcid": "https://orcid.org/0000-0002-6751-4773"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Turhan Can Kargin", "raw_affiliation_strings": ["Izmir, Turkey"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Izmir, Turkey", "institution_ids": []}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5080388958", "display_name": "Jakub Ko\u0142ota", "orcid": "https://orcid.org/0000-0002-2177-1555"}, "institutions": [{"id": "https://openalex.org/I46597724", "display_name": "Pozna\u0144 University of Technology", "ror": "https://ror.org/00p7p3302", "country_code": "PL", "type": "education", "lineage": ["https://openalex.org/I46597724"]}], "countries": ["PL"], "is_corresponding": true, "raw_author_name": "Jakub Ko\u0142ota", "raw_affiliation_strings": ["Poznan University of Technology, Institute of Automatic Control and Robotics, Piotrowo 3A, Poznan, 60-965, Poland"], "raw_orcid": "https://orcid.org/0000-0002-2177-1555", "affiliations": [{"raw_affiliation_string": "Poznan University of Technology, Institute of Automatic Control and Robotics, Piotrowo 3A, Poznan, 60-965, Poland", "institution_ids": ["https://openalex.org/I46597724"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 2, "corresponding_author_ids": ["https://openalex.org/A5080388958"], "corresponding_institution_ids": ["https://openalex.org/I46597724"], "apc_list": {"value": 2390, "currency": "EUR", "value_usd": 2990}, "apc_paid": {"value": 2390, "currency": "EUR", "value_usd": 2990}, "fwci": 2.0919, "has_fulltext": true, "cited_by_count": 20, "citation_normalized_percentile": {"value": 0.86840134, "is_in_top_1_percent": false, "is_in_top_10_percent": false}, "cited_by_percentile_year": {"min": 95, "max": 99}, "biblio": {"volume": "109", "issue": "4", "first_page": null, "last_page": null}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10868", "display_name": "Soft Robotics and Applications", "score": 0.9994999766349792, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10868", "display_name": "Soft Robotics and Applications", "score": 0.9994999766349792, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10675", "display_name": "Mechanical Circulatory Support Devices", "score": 0.9707000255584717, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11023", "display_name": "Prosthetics and Rehabilitation Robotics", "score": 0.9628000259399414, "subfield": {"id": "https://openalex.org/subfields/2204", "display_name": "Biomedical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7548692226409912}, {"id": "https://openalex.org/keywords/kinematics", "display_name": "Kinematics", "score": 0.7046644687652588}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.6768251657485962}, {"id": "https://openalex.org/keywords/planar", "display_name": "Planar", "score": 0.5207188725471497}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5094462037086487}, {"id": "https://openalex.org/keywords/curvature", "display_name": "Curvature", "score": 0.4762803912162781}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.4272230863571167}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.4260013997554779}, {"id": "https://openalex.org/keywords/control-theory", "display_name": "Control theory (sociology)", "score": 0.42077505588531494}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.20726802945137024}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.19595161080360413}, {"id": "https://openalex.org/keywords/physics", "display_name": "Physics", "score": 0.12559151649475098}, {"id": "https://openalex.org/keywords/classical-mechanics", "display_name": "Classical mechanics", "score": 0.1078442633152008}, {"id": "https://openalex.org/keywords/geometry", "display_name": "Geometry", "score": 0.10361593961715698}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7548692226409912}, {"id": "https://openalex.org/C39920418", "wikidata": "https://www.wikidata.org/wiki/Q11476", "display_name": "Kinematics", "level": 2, "score": 0.7046644687652588}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.6768251657485962}, {"id": "https://openalex.org/C134786449", "wikidata": "https://www.wikidata.org/wiki/Q3391255", "display_name": "Planar", "level": 2, "score": 0.5207188725471497}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5094462037086487}, {"id": "https://openalex.org/C195065555", "wikidata": "https://www.wikidata.org/wiki/Q214881", "display_name": "Curvature", "level": 2, "score": 0.4762803912162781}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.4272230863571167}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.4260013997554779}, {"id": "https://openalex.org/C47446073", "wikidata": "https://www.wikidata.org/wiki/Q5165890", "display_name": "Control theory (sociology)", "level": 3, "score": 0.42077505588531494}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.20726802945137024}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.19595161080360413}, {"id": "https://openalex.org/C121332964", "wikidata": "https://www.wikidata.org/wiki/Q413", "display_name": "Physics", "level": 0, "score": 0.12559151649475098}, {"id": "https://openalex.org/C74650414", "wikidata": "https://www.wikidata.org/wiki/Q11397", "display_name": "Classical mechanics", "level": 1, "score": 0.1078442633152008}, {"id": "https://openalex.org/C2524010", "wikidata": "https://www.wikidata.org/wiki/Q8087", "display_name": "Geometry", "level": 1, "score": 0.10361593961715698}, {"id": "https://openalex.org/C121684516", "wikidata": "https://www.wikidata.org/wiki/Q7600677", "display_name": "Computer graphics (images)", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1007/s10846-023-02003-0", "is_oa": true, "landing_page_url": "https://doi.org/10.1007/s10846-023-02003-0", "pdf_url": "https://link.springer.com/content/pdf/10.1007/s10846-023-02003-0.pdf", "source": {"id": "https://openalex.org/S91329792", "display_name": "Journal of Intelligent & Robotic Systems", "issn_l": "0921-0296", "issn": ["0921-0296", "1573-0409"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Journal of Intelligent &amp; Robotic Systems", "raw_type": "journal-article"}], "best_oa_location": {"id": "doi:10.1007/s10846-023-02003-0", "is_oa": true, "landing_page_url": "https://doi.org/10.1007/s10846-023-02003-0", "pdf_url": "https://link.springer.com/content/pdf/10.1007/s10846-023-02003-0.pdf", "source": {"id": "https://openalex.org/S91329792", "display_name": "Journal of Intelligent & Robotic Systems", "issn_l": "0921-0296", "issn": ["0921-0296", "1573-0409"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Journal of Intelligent &amp; Robotic Systems", "raw_type": "journal-article"}, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/16", "score": 0.800000011920929, "display_name": "Peace, Justice and strong institutions"}], "awards": [], "funders": [], "has_content": {"pdf": true, "grobid_xml": false}, "content_urls": {"pdf": "https://content.openalex.org/works/W4388911926.pdf"}, "referenced_works_count": 32, "referenced_works": ["https://openalex.org/W1891615552", "https://openalex.org/W1965456896", "https://openalex.org/W1989099984", "https://openalex.org/W1995071895", "https://openalex.org/W1997060717", "https://openalex.org/W2063797945", "https://openalex.org/W2101667962", "https://openalex.org/W2117162300", "https://openalex.org/W2145339207", "https://openalex.org/W2184993170", "https://openalex.org/W2257979135", "https://openalex.org/W2344786740", "https://openalex.org/W2511924737", "https://openalex.org/W2574978968", "https://openalex.org/W2609009256", "https://openalex.org/W2741375793", "https://openalex.org/W2775361682", "https://openalex.org/W2901112449", "https://openalex.org/W2933207051", "https://openalex.org/W2955790965", "https://openalex.org/W2962851396", "https://openalex.org/W2962977206", "https://openalex.org/W2964198579", "https://openalex.org/W2977992650", "https://openalex.org/W2994209110", "https://openalex.org/W3032958018", "https://openalex.org/W3094290217", "https://openalex.org/W3127964809", "https://openalex.org/W3204719635", "https://openalex.org/W6600062020", "https://openalex.org/W6603732165", "https://openalex.org/W6702088316"], "related_works": ["https://openalex.org/W2789522126", "https://openalex.org/W2066693961", "https://openalex.org/W2368363778", "https://openalex.org/W122584421", "https://openalex.org/W4244295168", "https://openalex.org/W2753351751", "https://openalex.org/W3185180338", "https://openalex.org/W2889348933", "https://openalex.org/W2351643838", "https://openalex.org/W2978007058"], "abstract_inverted_index": {"Abstract": [0], "Rigid": [1], "joint": [2], "manipulators": [3, 41], "are": [4], "limited": [5], "in": [6, 111, 144, 192, 200, 258, 280], "their": [7], "movement": [8, 25], "and": [9, 26, 45, 56, 103, 273], "degrees": [10], "of": [11, 52, 70, 129, 176, 195, 203, 209, 215, 237, 270, 277], "freedom": [12], "(DOF),": [13], "while": [14], "continuum": [15, 64, 118, 158, 196, 271], "robots": [16, 30, 272], "possess": [17], "a": [18, 35, 53, 75, 135, 187], "continuous": [19, 145], "backbone": [20], "that": [21, 138, 155], "allows": [22], "for": [23, 89, 116, 141, 190, 267], "free": [24], "multiple": [27], "DOF.": [28], "Continuum": [29], "move": [31, 162], "by": [32], "bending": [33], "over": [34], "section,": [36], "taking": [37], "inspiration": [38], "from": [39, 163], "biological": [40], "such": [42, 218, 241], "as": [43, 74, 219, 242], "tentacles": [44], "trunks.": [46], "This": [47, 94], "paper": [48, 263], "presents": [49], "an": [50], "implementation": [51, 102], "forward": [54, 91], "kinematics": [55, 58], "velocity": [57], "model": [59], "to": [60, 99, 106, 167, 185, 224, 229, 255], "describe": [61], "the": [62, 68, 84, 90, 108, 112, 127, 150, 156, 173, 177, 181, 193, 201, 213, 226, 235, 259, 265, 275], "planar": [63, 85, 117, 157], "robot,": [65], "along": [66], "with": [67], "application": [69, 202], "reinforcement": [71], "learning": [72, 142], "(RL)": [73], "control": [76, 115, 122, 269], "algorithm.": [77], "In": [78], "this": [79, 262, 281], "paper,": [80], "we": [81, 183], "have": [82], "adopted": [83], "constant": [86], "curvature": [87], "representation": [88], "kinematic": [92], "modeling.": [93], "choice": [95], "was": [96, 153], "made": [97], "due": [98], "its": [100, 104], "straightforward": [101], "potential": [105, 207, 266], "fill": [107], "literature": [109], "gap": [110], "field": [113, 194], "RL-based": [114, 268], "robots.": [119], "The": [120], "intended": [121], "mechanism": [123], "is": [124, 139], "achieved": [125], "through": [126], "use": [128, 236], "Deep": [130], "Deterministic": [131], "Policy": [132, 244, 250], "Gradient": [133], "(DDPG),": [134], "RL": [136, 204, 239], "algorithm": [137], "suited": [140], "controls": [143], "action": [146], "spaces.": [147], "After": [148], "simulating": [149], "algorithm,": [151], "it": [152], "observed": [154], "robot": [159, 197], "can": [160], "autonomously": [161], "any": [164, 168], "initial": [165], "point": [166, 171], "desired": [169], "goal": [170], "within": [172], "task": [174], "space": [175], "robot.": [178], "By": [179], "analyzing": [180], "results,": [182], "wanted": [184], "recommend": [186], "future": [188], "direction": [189], "research": [191, 279], "control,": [198], "specifically": [199], "algorithms.": [205], "One": [206], "area": [208], "focus": [210], "could": [211, 253], "be": [212], "integration": [214], "sensory": [216], "feedback,": [217], "vision": [220], "or": [221, 247], "force": [222], "sensing,": [223], "improve": [225], "robot\u2019s": [227], "ability": [228], "navigate": [230], "complex": [231], "environments.": [232], "Additionally,": [233], "exploring": [234], "different": [238], "algorithms,": [240], "Proximal": [243], "Optimization": [245, 251], "(PPO)": [246], "Trust": [248], "Region": [249], "(TRPO),": [252], "lead": [254], "further": [256], "advancements": [257], "field.": [260], "Overall,": [261], "demonstrates": [264], "highlights": [274], "importance": [276], "continued": [278], "area.": [282]}, "counts_by_year": [{"year": 2026, "cited_by_count": 1}, {"year": 2025, "cited_by_count": 12}, {"year": 2024, "cited_by_count": 7}], "updated_date": "2026-05-21T09:19:25.381259", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3128707967", "doi": "https://doi.org/10.3390/s21041278", "title": "Learning for a Robot: Deep Reinforcement Learning, Imitation Learning, Transfer Learning", "display_name": "Learning for a Robot: Deep Reinforcement Learning, Imitation Learning, Transfer Learning", "relevance_score": 189.86139, "publication_year": 2021, "publication_date": "2021-02-11", "ids": {"openalex": "https://openalex.org/W3128707967", "doi": "https://doi.org/10.3390/s21041278", "mag": "3128707967", "pmid": "https://pubmed.ncbi.nlm.nih.gov/33670109"}, "language": "en", "primary_location": {"id": "doi:10.3390/s21041278", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/s21041278", "pdf_url": "https://www.mdpi.com/1424-8220/21/4/1278/pdf?version=1613722477", "source": {"id": "https://openalex.org/S101949793", "display_name": "Sensors", "issn_l": "1424-8220", "issn": ["1424-8220"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Sensors", "raw_type": "journal-article"}, "type": "review", "indexed_in": ["crossref", "doaj", "pubmed"], "open_access": {"is_oa": true, "oa_status": "gold", "oa_url": "https://www.mdpi.com/1424-8220/21/4/1278/pdf?version=1613722477", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": null, "display_name": "Jiang Hua", "orcid": null}, "institutions": [{"id": "https://openalex.org/I43922553", "display_name": "Wuhan University of Science and Technology", "ror": "https://ror.org/00e4hrk88", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I43922553"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Jiang Hua", "raw_affiliation_strings": ["Key Laboratory of Metallurgical Equipment and Control Technology, Ministry of Education, Wuhan University of Science and Technology, Wuhan 430081, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Key Laboratory of Metallurgical Equipment and Control Technology, Ministry of Education, Wuhan University of Science and Technology, Wuhan 430081, China", "institution_ids": ["https://openalex.org/I43922553"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5112917934", "display_name": "Liangcai Zeng", "orcid": null}, "institutions": [{"id": "https://openalex.org/I43922553", "display_name": "Wuhan University of Science and Technology", "ror": "https://ror.org/00e4hrk88", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I43922553"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Liangcai Zeng", "raw_affiliation_strings": ["Key Laboratory of Metallurgical Equipment and Control Technology, Ministry of Education, Wuhan University of Science and Technology, Wuhan 430081, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Key Laboratory of Metallurgical Equipment and Control Technology, Ministry of Education, Wuhan University of Science and Technology, Wuhan 430081, China", "institution_ids": ["https://openalex.org/I43922553"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5082414273", "display_name": "Gongfa Li", "orcid": "https://orcid.org/0000-0002-2695-2742"}, "institutions": [{"id": "https://openalex.org/I43922553", "display_name": "Wuhan University of Science and Technology", "ror": "https://ror.org/00e4hrk88", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I43922553"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Gongfa Li", "raw_affiliation_strings": ["Key Laboratory of Metallurgical Equipment and Control Technology, Ministry of Education, Wuhan University of Science and Technology, Wuhan 430081, China"], "raw_orcid": "https://orcid.org/0000-0002-2695-2742", "affiliations": [{"raw_affiliation_string": "Key Laboratory of Metallurgical Equipment and Control Technology, Ministry of Education, Wuhan University of Science and Technology, Wuhan 430081, China", "institution_ids": ["https://openalex.org/I43922553"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5056287221", "display_name": "Zhaojie Ju", "orcid": "https://orcid.org/0000-0002-9524-7609"}, "institutions": [{"id": "https://openalex.org/I63072094", "display_name": "University of Portsmouth", "ror": "https://ror.org/03ykbk197", "country_code": "GB", "type": "education", "lineage": ["https://openalex.org/I63072094"]}], "countries": ["GB"], "is_corresponding": true, "raw_author_name": "Zhaojie Ju", "raw_affiliation_strings": ["School of Computing, University of Portsmouth, Portsmouth 03801, UK"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "School of Computing, University of Portsmouth, Portsmouth 03801, UK", "institution_ids": ["https://openalex.org/I63072094"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 4, "corresponding_author_ids": ["https://openalex.org/A5056287221"], "corresponding_institution_ids": ["https://openalex.org/I63072094"], "apc_list": {"value": 2400, "currency": "CHF", "value_usd": 2598}, "apc_paid": {"value": 2400, "currency": "CHF", "value_usd": 2598}, "fwci": 22.3427, "has_fulltext": true, "cited_by_count": 220, "citation_normalized_percentile": {"value": 0.99821685, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 98, "max": 100}, "biblio": {"volume": "21", "issue": "4", "first_page": "1278", "last_page": "1278"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9986000061035156, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12784", "display_name": "Modular Robots and Swarm Intelligence", "score": 0.987500011920929, "subfield": {"id": "https://openalex.org/subfields/2210", "display_name": "Mechanical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7718876600265503}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.6909251809120178}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6601171493530273}, {"id": "https://openalex.org/keywords/robot-learning", "display_name": "Robot learning", "score": 0.6515454053878784}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.6104543805122375}, {"id": "https://openalex.org/keywords/transfer-of-learning", "display_name": "Transfer of learning", "score": 0.5348682999610901}, {"id": "https://openalex.org/keywords/imitation", "display_name": "Imitation", "score": 0.4907444715499878}, {"id": "https://openalex.org/keywords/human\u2013computer-interaction", "display_name": "Human\u2013computer interaction", "score": 0.3921035826206207}, {"id": "https://openalex.org/keywords/engineering", "display_name": "Engineering", "score": 0.3291204571723938}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.19414031505584717}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7718876600265503}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.6909251809120178}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6601171493530273}, {"id": "https://openalex.org/C188888258", "wikidata": "https://www.wikidata.org/wiki/Q7353390", "display_name": "Robot learning", "level": 4, "score": 0.6515454053878784}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.6104543805122375}, {"id": "https://openalex.org/C150899416", "wikidata": "https://www.wikidata.org/wiki/Q1820378", "display_name": "Transfer of learning", "level": 2, "score": 0.5348682999610901}, {"id": "https://openalex.org/C126388530", "wikidata": "https://www.wikidata.org/wiki/Q1131737", "display_name": "Imitation", "level": 2, "score": 0.4907444715499878}, {"id": "https://openalex.org/C107457646", "wikidata": "https://www.wikidata.org/wiki/Q207434", "display_name": "Human\u2013computer interaction", "level": 1, "score": 0.3921035826206207}, {"id": "https://openalex.org/C127413603", "wikidata": "https://www.wikidata.org/wiki/Q11023", "display_name": "Engineering", "level": 0, "score": 0.3291204571723938}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.19414031505584717}, {"id": "https://openalex.org/C15744967", "wikidata": "https://www.wikidata.org/wiki/Q9418", "display_name": "Psychology", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C77805123", "wikidata": "https://www.wikidata.org/wiki/Q161272", "display_name": "Social psychology", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 6, "locations": [{"id": "doi:10.3390/s21041278", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/s21041278", "pdf_url": "https://www.mdpi.com/1424-8220/21/4/1278/pdf?version=1613722477", "source": {"id": "https://openalex.org/S101949793", "display_name": "Sensors", "issn_l": "1424-8220", "issn": ["1424-8220"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Sensors", "raw_type": "journal-article"}, {"id": "pmid:33670109", "is_oa": false, "landing_page_url": "https://pubmed.ncbi.nlm.nih.gov/33670109", "pdf_url": null, "source": {"id": "https://openalex.org/S4306525036", "display_name": "PubMed", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I1299303238", "host_organization_name": "National Institutes of Health", "host_organization_lineage": ["https://openalex.org/I1299303238"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Sensors (Basel, Switzerland)", "raw_type": null}, {"id": "pmh:oai:researchportal.port.ac.uk:publications/e9715e8d-80cb-456f-bc66-1ef32bc96978", "is_oa": true, "landing_page_url": "http://www.scopus.com/inward/record.url?scp=85100642428&partnerID=8YFLogxK", "pdf_url": "https://researchportal.port.ac.uk/portal/en/publications/learning-for-a-robot(e9715e8d-80cb-456f-bc66-1ef32bc96978).html", "source": {"id": "https://openalex.org/S4306401774", "display_name": "Portsmouth Research Portal (University of Portsmouth)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I63072094", "host_organization_name": "University of Portsmouth", "host_organization_lineage": ["https://openalex.org/I63072094"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": ""}, {"id": "pmh:oai:doaj.org/article:107aff1b42cb4716ba1d9a5d4cae0825", "is_oa": true, "landing_page_url": "https://doaj.org/article/107aff1b42cb4716ba1d9a5d4cae0825", "pdf_url": null, "source": {"id": "https://openalex.org/S4306401280", "display_name": "DOAJ (DOAJ: Directory of Open Access Journals)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by-sa", "license_id": "https://openalex.org/licenses/cc-by-sa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Sensors, Vol 21, Iss 4, p 1278 (2021)", "raw_type": "article"}, {"id": "pmh:oai:mdpi.com:/1424-8220/21/4/1278/", "is_oa": true, "landing_page_url": "https://dx.doi.org/10.3390/s21041278", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400947", "display_name": "MDPI (MDPI AG)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I4210097602", "host_organization_name": "Multidisciplinary Digital Publishing Institute (Switzerland)", "host_organization_lineage": ["https://openalex.org/I4210097602"], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Sensors; Volume 21; Issue 4; Pages: 1278", "raw_type": "Text"}, {"id": "pmh:oai:pubmedcentral.nih.gov:7916895", "is_oa": true, "landing_page_url": "https://www.ncbi.nlm.nih.gov/pmc/articles/7916895", "pdf_url": null, "source": {"id": "https://openalex.org/S2764455111", "display_name": "PubMed Central", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I1299303238", "host_organization_name": "National Institutes of Health", "host_organization_lineage": ["https://openalex.org/I1299303238"], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Sensors (Basel)", "raw_type": "Text"}], "best_oa_location": {"id": "doi:10.3390/s21041278", "is_oa": true, "landing_page_url": "https://doi.org/10.3390/s21041278", "pdf_url": "https://www.mdpi.com/1424-8220/21/4/1278/pdf?version=1613722477", "source": {"id": "https://openalex.org/S101949793", "display_name": "Sensors", "issn_l": "1424-8220", "issn": ["1424-8220"], "is_oa": true, "is_in_doaj": true, "is_core": true, "host_organization": "https://openalex.org/P4310310987", "host_organization_name": "Multidisciplinary Digital Publishing Institute", "host_organization_lineage": ["https://openalex.org/P4310310987"], "host_organization_lineage_names": ["Multidisciplinary Digital Publishing Institute"], "type": "journal"}, "license": "cc-by", "license_id": "https://openalex.org/licenses/cc-by", "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Sensors", "raw_type": "journal-article"}, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"pdf": true, "grobid_xml": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W3128707967.pdf", "grobid_xml": "https://content.openalex.org/works/W3128707967.grobid-xml"}, "referenced_works_count": 162, "referenced_works": ["https://openalex.org/W80905443", "https://openalex.org/W1164749991", "https://openalex.org/W1581800915", "https://openalex.org/W1588269491", "https://openalex.org/W1757796397", "https://openalex.org/W1812075590", "https://openalex.org/W1820657498", "https://openalex.org/W1892339738", "https://openalex.org/W1913984289", "https://openalex.org/W1929981607", "https://openalex.org/W1931877416", "https://openalex.org/W1940481278", "https://openalex.org/W1977655452", "https://openalex.org/W1990603682", "https://openalex.org/W1999156278", "https://openalex.org/W1999874108", "https://openalex.org/W1999985000", "https://openalex.org/W2001095967", "https://openalex.org/W2005824379", "https://openalex.org/W2036637075", "https://openalex.org/W2051620263", "https://openalex.org/W2061562262", "https://openalex.org/W2064878003", "https://openalex.org/W2070678636", "https://openalex.org/W2078763164", "https://openalex.org/W2086240243", "https://openalex.org/W2096001037", "https://openalex.org/W2100401322", "https://openalex.org/W2103254706", "https://openalex.org/W2106008664", "https://openalex.org/W2106456603", "https://openalex.org/W2115166552", "https://openalex.org/W2117675763", "https://openalex.org/W2121615981", "https://openalex.org/W2125612430", "https://openalex.org/W2126909264", "https://openalex.org/W2130301822", "https://openalex.org/W2133068870", "https://openalex.org/W2140135625", "https://openalex.org/W2147032798", "https://openalex.org/W2152166054", "https://openalex.org/W2154543878", "https://openalex.org/W2155007355", "https://openalex.org/W2155027007", "https://openalex.org/W2155289806", "https://openalex.org/W2157719307", "https://openalex.org/W2164126051", "https://openalex.org/W2164474021", "https://openalex.org/W2169187067", "https://openalex.org/W2169498096", "https://openalex.org/W2171868844", "https://openalex.org/W2201912979", "https://openalex.org/W2205950488", "https://openalex.org/W2205969003", "https://openalex.org/W2260756217", "https://openalex.org/W2282481780", "https://openalex.org/W2290104316", "https://openalex.org/W2296594443", "https://openalex.org/W2327520576", "https://openalex.org/W2331138946", "https://openalex.org/W2342840547", "https://openalex.org/W2401592218", "https://openalex.org/W2432717477", "https://openalex.org/W2434014514", "https://openalex.org/W2466175722", "https://openalex.org/W2472819217", "https://openalex.org/W2529658650", "https://openalex.org/W2552241273", "https://openalex.org/W2561907433", "https://openalex.org/W2567015638", "https://openalex.org/W2575705757", "https://openalex.org/W2580495915", "https://openalex.org/W2591957724", "https://openalex.org/W2604763608", "https://openalex.org/W2605368761", "https://openalex.org/W2617317556", "https://openalex.org/W2722424650", "https://openalex.org/W2729615412", "https://openalex.org/W2735089625", "https://openalex.org/W2740210681", "https://openalex.org/W2741122588", "https://openalex.org/W2745868649", "https://openalex.org/W2753160622", "https://openalex.org/W2754876573", "https://openalex.org/W2755546070", "https://openalex.org/W2767050701", "https://openalex.org/W2775954438", "https://openalex.org/W2781726626", "https://openalex.org/W2785506286", "https://openalex.org/W2785962646", "https://openalex.org/W2796290181", "https://openalex.org/W2796864868", "https://openalex.org/W2808844346", "https://openalex.org/W2809139847", "https://openalex.org/W2883140436", "https://openalex.org/W2887919024", "https://openalex.org/W2888838050", "https://openalex.org/W2891746250", "https://openalex.org/W2891765131", "https://openalex.org/W2894779868", "https://openalex.org/W2897873640", "https://openalex.org/W2898035736", "https://openalex.org/W2899059606", "https://openalex.org/W2899460553", "https://openalex.org/W2908261578", "https://openalex.org/W2909711564", "https://openalex.org/W2910718468", "https://openalex.org/W2910913210", "https://openalex.org/W2911087563", "https://openalex.org/W2914824760", "https://openalex.org/W2919115771", "https://openalex.org/W2921035978", "https://openalex.org/W2930863966", "https://openalex.org/W2949608212", "https://openalex.org/W2950069298", "https://openalex.org/W2950471160", "https://openalex.org/W2951775809", "https://openalex.org/W2952629144", "https://openalex.org/W2952842465", "https://openalex.org/W2955035422", "https://openalex.org/W2962736495", "https://openalex.org/W2962887844", "https://openalex.org/W2962957031", "https://openalex.org/W2963276406", "https://openalex.org/W2963669336", "https://openalex.org/W2963703448", "https://openalex.org/W2963713397", "https://openalex.org/W2963775850", "https://openalex.org/W2963802910", "https://openalex.org/W2963864421", "https://openalex.org/W2964043796", "https://openalex.org/W2964055695", "https://openalex.org/W2964161785", "https://openalex.org/W2969277962", "https://openalex.org/W2976205474", "https://openalex.org/W2980820015", "https://openalex.org/W2990747716", "https://openalex.org/W2991535721", "https://openalex.org/W3003533476", "https://openalex.org/W3004712215", "https://openalex.org/W3015960732", "https://openalex.org/W3019235189", "https://openalex.org/W3030981716", "https://openalex.org/W3040683359", "https://openalex.org/W3090600213", "https://openalex.org/W3091318627", "https://openalex.org/W3100789280", "https://openalex.org/W3101442004", "https://openalex.org/W3104515094", "https://openalex.org/W3109467707", "https://openalex.org/W3112422759", "https://openalex.org/W3147742130", "https://openalex.org/W6638018090", "https://openalex.org/W6638750020", "https://openalex.org/W6677555207", "https://openalex.org/W6682849425", "https://openalex.org/W6696324988", "https://openalex.org/W6713055699", "https://openalex.org/W6728925229", "https://openalex.org/W6735944222", "https://openalex.org/W6744275271", "https://openalex.org/W6759312711"], "related_works": ["https://openalex.org/W2930863966", "https://openalex.org/W2126211886", "https://openalex.org/W3153786280", "https://openalex.org/W3127551068", "https://openalex.org/W4220829754", "https://openalex.org/W2165180011", "https://openalex.org/W3165944253", "https://openalex.org/W2351343564", "https://openalex.org/W2170052867", "https://openalex.org/W2171010636"], "abstract_inverted_index": {"Dexterous": [0], "manipulation": [1], "of": [2, 9, 30, 47, 61, 71, 81, 86], "the": [3, 31, 45, 56, 62, 69, 79, 101, 112], "robot": [4, 43, 133], "is": [5], "an": [6, 41], "important": [7], "part": [8], "realizing": [10], "intelligence,": [11, 83], "but": [12], "manipulators": [13], "can": [14], "only": [15], "perform": [16], "simple": [17], "tasks": [18, 117], "such": [19], "as": [20], "sorting": [21], "and": [22, 50, 59, 74, 94, 107, 129, 148, 151], "packing": [23], "in": [24, 76, 92, 104, 132, 137], "a": [25, 37], "structured": [26], "environment.": [27], "In": [28], "view": [29], "existing": [32], "problem,": [33], "this": [34], "paper": [35, 53], "presents": [36], "state-of-the-art": [38], "survey": [39, 98], "on": [40, 68, 143], "intelligent": [42], "with": [44], "capability": [46], "autonomous": [48], "deciding": [49], "learning.": [51], "The": [52, 97], "first": [54], "reviews": [55], "main": [57], "achievements": [58, 141], "research": [60, 87, 103, 153], "robot,": [63], "which": [64], "were": [65], "mainly": [66], "based": [67, 142], "breakthrough": [70], "automatic": [72], "control": [73, 134], "hardware": [75], "mechanics.": [77], "With": [78], "evolution": [80], "artificial": [82], "many": [84], "pieces": [85], "have": [88], "made": [89], "further": [90], "progresses": [91], "adaptive": [93], "robust": [95], "control.": [96], "reveals": [99], "that": [100], "latest": [102], "deep": [105, 124], "learning": [106, 109, 131], "reinforcement": [108, 125], "has": [110], "paved": [111], "way": [113], "for": [114], "highly": [115], "complex": [116], "to": [118], "be": [119], "performed": [120], "by": [121], "robots.": [122], "Furthermore,": [123], "learning,": [126, 128], "imitation": [127], "transfer": [130], "are": [135, 146, 155], "discussed": [136], "detail.": [138], "Finally,": [139], "major": [140], "these": [144], "methods": [145], "summarized": [147], "analyzed": [149], "thoroughly,": [150], "future": [152], "challenges": [154], "proposed.": [156]}, "counts_by_year": [{"year": 2026, "cited_by_count": 12}, {"year": 2025, "cited_by_count": 47}, {"year": 2024, "cited_by_count": 67}, {"year": 2023, "cited_by_count": 43}, {"year": 2022, "cited_by_count": 44}, {"year": 2021, "cited_by_count": 7}], "updated_date": "2026-05-21T09:19:25.381259", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2787666871", "doi": "https://doi.org/10.1016/j.neunet.2018.07.006", "title": "State representation learning for control: An overview", "display_name": "State representation learning for control: An overview", "relevance_score": 189.60963, "publication_year": 2018, "publication_date": "2018-08-04", "ids": {"openalex": "https://openalex.org/W2787666871", "doi": "https://doi.org/10.1016/j.neunet.2018.07.006", "mag": "2787666871", "pmid": "https://pubmed.ncbi.nlm.nih.gov/30268059"}, "language": "en", "primary_location": {"id": "doi:10.1016/j.neunet.2018.07.006", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.neunet.2018.07.006", "pdf_url": null, "source": {"id": "https://openalex.org/S123019304", "display_name": "Neural Networks", "issn_l": "0893-6080", "issn": ["0893-6080", "1879-2782"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Neural Networks", "raw_type": "journal-article"}, "type": "review", "indexed_in": ["arxiv", "crossref", "pubmed"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": "https://arxiv.org/pdf/1802.04181", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5006026661", "display_name": "Timoth\u00e9e Lesort", "orcid": "https://orcid.org/0000-0002-8669-0764"}, "institutions": [{"id": "https://openalex.org/I201181511", "display_name": "\u00c9cole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es", "ror": "https://ror.org/0309cs235", "country_code": "FR", "type": "education", "lineage": ["https://openalex.org/I201181511", "https://openalex.org/I4210145102"]}, {"id": "https://openalex.org/I4210140930", "display_name": "Thales (France)", "ror": "https://ror.org/04emwm605", "country_code": "FR", "type": "company", "lineage": ["https://openalex.org/I4210140930"]}], "countries": ["FR"], "is_corresponding": true, "raw_author_name": "Timoth\u00e9e Lesort", "raw_affiliation_strings": ["Vision Lab, Thales, Theresis, Palaiseau, France; U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: timothee.lesort@thalesgroup.com", "Thales Research and Technology [Palaiseau] (1 Avenue Augustin Fresnel, 91767 Palaiseau cedex - France)", "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Vision Lab, Thales, Theresis, Palaiseau, France; U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: timothee.lesort@thalesgroup.com", "institution_ids": ["https://openalex.org/I201181511"]}, {"raw_affiliation_string": "Thales Research and Technology [Palaiseau] (1 Avenue Augustin Fresnel, 91767 Palaiseau cedex - France)", "institution_ids": ["https://openalex.org/I4210140930"]}, {"raw_affiliation_string": "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "institution_ids": []}, {"raw_affiliation_string": "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)", "institution_ids": []}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5058176171", "display_name": "Natalia D\u00edaz-Rodr\u00edguez", "orcid": "https://orcid.org/0000-0003-3362-9326"}, "institutions": [{"id": "https://openalex.org/I201181511", "display_name": "\u00c9cole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es", "ror": "https://ror.org/0309cs235", "country_code": "FR", "type": "education", "lineage": ["https://openalex.org/I201181511", "https://openalex.org/I4210145102"]}], "countries": ["FR"], "is_corresponding": false, "raw_author_name": "Natalia D\u00edaz-Rodr\u00edguez", "raw_affiliation_strings": ["U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: natalia.diaz@ensta-paristech.fr", "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)"], "raw_orcid": "https://orcid.org/0000-0003-3362-9326", "affiliations": [{"raw_affiliation_string": "U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: natalia.diaz@ensta-paristech.fr", "institution_ids": ["https://openalex.org/I201181511"]}, {"raw_affiliation_string": "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "institution_ids": []}, {"raw_affiliation_string": "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)", "institution_ids": []}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5112346289", "display_name": "Jean-Frano\u0327is Goudou", "orcid": null}, "institutions": [{"id": "https://openalex.org/I2801356230", "display_name": "Thales (Australia)", "ror": "https://ror.org/00f7vya03", "country_code": "AU", "type": "company", "lineage": ["https://openalex.org/I2801356230", "https://openalex.org/I4210140930"]}, {"id": "https://openalex.org/I4210140930", "display_name": "Thales (France)", "ror": "https://ror.org/04emwm605", "country_code": "FR", "type": "company", "lineage": ["https://openalex.org/I4210140930"]}], "countries": ["AU", "FR"], "is_corresponding": false, "raw_author_name": "Jean-Frano\u0327is Goudou", "raw_affiliation_strings": ["Vision Lab, Thales, Theresis, Palaiseau, France. Electronic address: jean-francois.goudou@thalesgroup.com", "Thales Research and Technology [Palaiseau] (1 Avenue Augustin Fresnel, 91767 Palaiseau cedex - France)"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Vision Lab, Thales, Theresis, Palaiseau, France. Electronic address: jean-francois.goudou@thalesgroup.com", "institution_ids": ["https://openalex.org/I2801356230"]}, {"raw_affiliation_string": "Thales Research and Technology [Palaiseau] (1 Avenue Augustin Fresnel, 91767 Palaiseau cedex - France)", "institution_ids": ["https://openalex.org/I4210140930"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5027931470", "display_name": "David Filliat", "orcid": "https://orcid.org/0000-0002-5739-1618"}, "institutions": [{"id": "https://openalex.org/I201181511", "display_name": "\u00c9cole Nationale Sup\u00e9rieure de Techniques Avanc\u00e9es", "ror": "https://ror.org/0309cs235", "country_code": "FR", "type": "education", "lineage": ["https://openalex.org/I201181511", "https://openalex.org/I4210145102"]}], "countries": ["FR"], "is_corresponding": false, "raw_author_name": "David Filliat", "raw_affiliation_strings": ["U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: david.filliat@ensta.fr", "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "U2IS, ENSTA ParisTech, Inria FLOWERS team, Universite Paris Saclay, Palaiseau, France. Electronic address: david.filliat@ensta.fr", "institution_ids": ["https://openalex.org/I201181511"]}, {"raw_affiliation_string": "Flowers - Flowing Epigenetic Robots and Systems (200, avenue de la Vieille Tour \r\n33405 Talence cedex - France)", "institution_ids": []}, {"raw_affiliation_string": "U2IS - Unit\u00e9 d'Informatique et d'Ing\u00e9nierie des Syst\u00e8mes (828, boulevard des Mar\u00e9chaux, 91762 Palaiseau CEDEX - France)", "institution_ids": []}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 4, "corresponding_author_ids": ["https://openalex.org/A5006026661"], "corresponding_institution_ids": ["https://openalex.org/I201181511", "https://openalex.org/I4210140930"], "apc_list": {"value": 3350, "currency": "USD", "value_usd": 3350}, "apc_paid": null, "fwci": 25.2374, "has_fulltext": false, "cited_by_count": 294, "citation_normalized_percentile": {"value": 0.9954728, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 99, "max": 100}, "biblio": {"volume": "108", "issue": null, "first_page": "379", "last_page": "392"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9976999759674072, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9976999759674072, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12072", "display_name": "Machine Learning and Algorithms", "score": 0.9930999875068665, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11307", "display_name": "Domain Adaptation and Few-Shot Learning", "score": 0.9909999966621399, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/representation", "display_name": "Representation (politics)", "score": 0.7435513734817505}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7278826832771301}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7181282043457031}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.7168611288070679}, {"id": "https://openalex.org/keywords/curse-of-dimensionality", "display_name": "Curse of dimensionality", "score": 0.6658106446266174}, {"id": "https://openalex.org/keywords/feature-learning", "display_name": "Feature learning", "score": 0.5952423810958862}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.5883178114891052}, {"id": "https://openalex.org/keywords/dimension", "display_name": "Dimension (graph theory)", "score": 0.5524753332138062}, {"id": "https://openalex.org/keywords/external-data-representation", "display_name": "External Data Representation", "score": 0.511781632900238}, {"id": "https://openalex.org/keywords/state", "display_name": "State (computer science)", "score": 0.47414276003837585}, {"id": "https://openalex.org/keywords/implementation", "display_name": "Implementation", "score": 0.47349199652671814}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.4460316300392151}, {"id": "https://openalex.org/keywords/robotics", "display_name": "Robotics", "score": 0.4317273795604706}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.2498774528503418}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.08870789408683777}, {"id": "https://openalex.org/keywords/algorithm", "display_name": "Algorithm", "score": 0.0863552987575531}, {"id": "https://openalex.org/keywords/software-engineering", "display_name": "Software engineering", "score": 0.0806734561920166}], "concepts": [{"id": "https://openalex.org/C2776359362", "wikidata": "https://www.wikidata.org/wiki/Q2145286", "display_name": "Representation (politics)", "level": 3, "score": 0.7435513734817505}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7278826832771301}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7181282043457031}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.7168611288070679}, {"id": "https://openalex.org/C111030470", "wikidata": "https://www.wikidata.org/wiki/Q1430460", "display_name": "Curse of dimensionality", "level": 2, "score": 0.6658106446266174}, {"id": "https://openalex.org/C59404180", "wikidata": "https://www.wikidata.org/wiki/Q17013334", "display_name": "Feature learning", "level": 2, "score": 0.5952423810958862}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.5883178114891052}, {"id": "https://openalex.org/C33676613", "wikidata": "https://www.wikidata.org/wiki/Q13415176", "display_name": "Dimension (graph theory)", "level": 2, "score": 0.5524753332138062}, {"id": "https://openalex.org/C116409475", "wikidata": "https://www.wikidata.org/wiki/Q1385056", "display_name": "External Data Representation", "level": 2, "score": 0.511781632900238}, {"id": "https://openalex.org/C48103436", "wikidata": "https://www.wikidata.org/wiki/Q599031", "display_name": "State (computer science)", "level": 2, "score": 0.47414276003837585}, {"id": "https://openalex.org/C26713055", "wikidata": "https://www.wikidata.org/wiki/Q245962", "display_name": "Implementation", "level": 2, "score": 0.47349199652671814}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.4460316300392151}, {"id": "https://openalex.org/C34413123", "wikidata": "https://www.wikidata.org/wiki/Q170978", "display_name": "Robotics", "level": 3, "score": 0.4317273795604706}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.2498774528503418}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.08870789408683777}, {"id": "https://openalex.org/C11413529", "wikidata": "https://www.wikidata.org/wiki/Q8366", "display_name": "Algorithm", "level": 1, "score": 0.0863552987575531}, {"id": "https://openalex.org/C115903868", "wikidata": "https://www.wikidata.org/wiki/Q80993", "display_name": "Software engineering", "level": 1, "score": 0.0806734561920166}, {"id": "https://openalex.org/C94625758", "wikidata": "https://www.wikidata.org/wiki/Q7163", "display_name": "Politics", "level": 2, "score": 0.0}, {"id": "https://openalex.org/C202444582", "wikidata": "https://www.wikidata.org/wiki/Q837863", "display_name": "Pure mathematics", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C17744445", "wikidata": "https://www.wikidata.org/wiki/Q36442", "display_name": "Political science", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C199539241", "wikidata": "https://www.wikidata.org/wiki/Q7748", "display_name": "Law", "level": 1, "score": 0.0}], "mesh": [{"descriptor_ui": "D000069550", "descriptor_name": "Machine Learning", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": true}, {"descriptor_ui": "D000069550", "descriptor_name": "Machine Learning", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": true}, {"descriptor_ui": "D000069550", "descriptor_name": "Machine Learning", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": true}, {"descriptor_ui": "D000465", "descriptor_name": "Algorithms", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": true}, {"descriptor_ui": "D000465", "descriptor_name": "Algorithms", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": true}, {"descriptor_ui": "D000465", "descriptor_name": "Algorithms", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": true}, {"descriptor_ui": "D006801", "descriptor_name": "Humans", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D006801", "descriptor_name": "Humans", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D006801", "descriptor_name": "Humans", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D012054", "descriptor_name": "Reinforcement, Psychology", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D012054", "descriptor_name": "Reinforcement, Psychology", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D012054", "descriptor_name": "Reinforcement, Psychology", "qualifier_ui": null, "qualifier_name": null, "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000379", "qualifier_name": "methods", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000379", "qualifier_name": "methods", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000379", "qualifier_name": "methods", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": false}, {"descriptor_ui": "D012371", "descriptor_name": "Robotics", "qualifier_ui": "Q000639", "qualifier_name": "trends", "is_major_topic": false}], "locations_count": 5, "locations": [{"id": "doi:10.1016/j.neunet.2018.07.006", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.neunet.2018.07.006", "pdf_url": null, "source": {"id": "https://openalex.org/S123019304", "display_name": "Neural Networks", "issn_l": "0893-6080", "issn": ["0893-6080", "1879-2782"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Neural Networks", "raw_type": "journal-article"}, {"id": "pmid:30268059", "is_oa": false, "landing_page_url": "https://pubmed.ncbi.nlm.nih.gov/30268059", "pdf_url": null, "source": {"id": "https://openalex.org/S4306525036", "display_name": "PubMed", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I1299303238", "host_organization_name": "National Institutes of Health", "host_organization_lineage": ["https://openalex.org/I1299303238"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Neural networks : the official journal of the International Neural Network Society", "raw_type": null}, {"id": "pmh:oai:arXiv.org:1802.04181", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1802.04181", "pdf_url": "https://arxiv.org/pdf/1802.04181", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "text"}, {"id": "pmh:oai:HAL:hal-01858558v1", "is_oa": true, "landing_page_url": "https://hal.science/hal-01858558", "pdf_url": null, "source": {"id": "https://openalex.org/S4306402512", "display_name": "HAL (Le Centre pour la Communication Scientifique Directe)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I1294671590", "host_organization_name": "Centre National de la Recherche Scientifique", "host_organization_lineage": ["https://openalex.org/I1294671590"], "host_organization_lineage_names": [], "type": "repository"}, "license": "other-oa", "license_id": "https://openalex.org/licenses/other-oa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Neural Networks, 2018, 108, pp.379-392. &#x27E8;10.1016/j.neunet.2018.07.006&#x27E9;", "raw_type": "Journal articles"}, {"id": "pmh:oai:digibug.ugr.es:10481/88347", "is_oa": true, "landing_page_url": "https://hdl.handle.net/10481/88347", "pdf_url": null, "source": {"id": "https://openalex.org/S4306400567", "display_name": "Institutional Repository of the University of Granada (University of Granada)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I173304897", "host_organization_name": "Universidad de Granada", "host_organization_lineage": ["https://openalex.org/I173304897"], "host_organization_lineage_names": [], "type": "repository"}, "license": "cc-by-nc-nd", "license_id": "https://openalex.org/licenses/cc-by-nc-nd", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "", "raw_type": "info:eu-repo/semantics/article"}], "best_oa_location": {"id": "pmh:oai:arXiv.org:1802.04181", "is_oa": true, "landing_page_url": "http://arxiv.org/abs/1802.04181", "pdf_url": "https://arxiv.org/pdf/1802.04181", "source": {"id": "https://openalex.org/S4306400194", "display_name": "arXiv (Cornell University)", "issn_l": null, "issn": null, "is_oa": true, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I205783295", "host_organization_name": "Cornell University", "host_organization_lineage": ["https://openalex.org/I205783295"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "text"}, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/16", "score": 0.5099999904632568, "display_name": "Peace, Justice and strong institutions"}], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 139, "referenced_works": ["https://openalex.org/W60493759", "https://openalex.org/W199564175", "https://openalex.org/W834081922", "https://openalex.org/W1164749991", "https://openalex.org/W1515851193", "https://openalex.org/W1530239281", "https://openalex.org/W1571401318", "https://openalex.org/W1580389772", "https://openalex.org/W1583837637", "https://openalex.org/W1591713425", "https://openalex.org/W1625577255", "https://openalex.org/W1898476659", "https://openalex.org/W1909320841", "https://openalex.org/W1957496711", "https://openalex.org/W1959608418", "https://openalex.org/W1990693003", "https://openalex.org/W2002563186", "https://openalex.org/W2025768430", "https://openalex.org/W2061478368", "https://openalex.org/W2085627234", "https://openalex.org/W2094024286", "https://openalex.org/W2099471712", "https://openalex.org/W2101524054", "https://openalex.org/W2115668428", "https://openalex.org/W2136922672", "https://openalex.org/W2140135625", "https://openalex.org/W2145094598", "https://openalex.org/W2145339207", "https://openalex.org/W2146444479", "https://openalex.org/W2150468603", "https://openalex.org/W2157364932", "https://openalex.org/W2159008857", "https://openalex.org/W2173248099", "https://openalex.org/W2177066871", "https://openalex.org/W2179072710", "https://openalex.org/W2210483910", "https://openalex.org/W2260158983", "https://openalex.org/W2337977475", "https://openalex.org/W2338684808", "https://openalex.org/W2396178844", "https://openalex.org/W2400532028", "https://openalex.org/W2411541852", "https://openalex.org/W2412320034", "https://openalex.org/W2426267443", "https://openalex.org/W2434741482", "https://openalex.org/W2442341664", "https://openalex.org/W2473208550", "https://openalex.org/W2493343568", "https://openalex.org/W2534725218", "https://openalex.org/W2548275288", "https://openalex.org/W2559655401", "https://openalex.org/W2563830277", "https://openalex.org/W2567455162", "https://openalex.org/W2570734388", "https://openalex.org/W2609990452", "https://openalex.org/W2613677041", "https://openalex.org/W2614839826", "https://openalex.org/W2619484182", "https://openalex.org/W2735796404", "https://openalex.org/W2739083961", "https://openalex.org/W2751258126", "https://openalex.org/W2753738274", "https://openalex.org/W2754517384", "https://openalex.org/W2754695199", "https://openalex.org/W2758237641", "https://openalex.org/W2767624189", "https://openalex.org/W2775192669", "https://openalex.org/W2779977383", "https://openalex.org/W2781585732", "https://openalex.org/W2786917922", "https://openalex.org/W2799245749", "https://openalex.org/W2903420114", "https://openalex.org/W2949296282", "https://openalex.org/W2951004968", "https://openalex.org/W2951751411", "https://openalex.org/W2962808998", "https://openalex.org/W2962899390", "https://openalex.org/W2963009616", "https://openalex.org/W2963226019", "https://openalex.org/W2963305465", "https://openalex.org/W2963375351", "https://openalex.org/W2963430173", "https://openalex.org/W2963523627", "https://openalex.org/W2963634205", "https://openalex.org/W2963826370", "https://openalex.org/W2963864421", "https://openalex.org/W2963979925", "https://openalex.org/W2964112890", "https://openalex.org/W2964282857", "https://openalex.org/W2964329252", "https://openalex.org/W2990138404", "https://openalex.org/W2997574889", "https://openalex.org/W3037207827", "https://openalex.org/W3037211759", "https://openalex.org/W3093010610", "https://openalex.org/W3103780890", "https://openalex.org/W3105287169", "https://openalex.org/W3141952980", "https://openalex.org/W4214717370", "https://openalex.org/W4294226150", "https://openalex.org/W4294568686", "https://openalex.org/W4297744728", "https://openalex.org/W4297789121", "https://openalex.org/W4297809330", "https://openalex.org/W4299408792", "https://openalex.org/W4319988532", "https://openalex.org/W4320013936", "https://openalex.org/W6608171802", "https://openalex.org/W6623316541", "https://openalex.org/W6631821572", "https://openalex.org/W6635351661", "https://openalex.org/W6638286568", "https://openalex.org/W6640963894", "https://openalex.org/W6650972816", "https://openalex.org/W6657028776", "https://openalex.org/W6672056806", "https://openalex.org/W6680657880", "https://openalex.org/W6681096077", "https://openalex.org/W6683354640", "https://openalex.org/W6683390034", "https://openalex.org/W6685152286", "https://openalex.org/W6685755111", "https://openalex.org/W6688347243", "https://openalex.org/W6692963853", "https://openalex.org/W6701573534", "https://openalex.org/W6715501732", "https://openalex.org/W6718140377", "https://openalex.org/W6718991148", "https://openalex.org/W6731862918", "https://openalex.org/W6738087714", "https://openalex.org/W6740836278", "https://openalex.org/W6743541709", "https://openalex.org/W6744123322", "https://openalex.org/W6747082033", "https://openalex.org/W6747387971", "https://openalex.org/W6750253780", "https://openalex.org/W6750453880", "https://openalex.org/W6780225908", "https://openalex.org/W6780559895"], "related_works": ["https://openalex.org/W2952512863", "https://openalex.org/W3097774776", "https://openalex.org/W2187269125", "https://openalex.org/W3134504629", "https://openalex.org/W2938696877", "https://openalex.org/W4323911413", "https://openalex.org/W1982536061", "https://openalex.org/W4210631502", "https://openalex.org/W4286796787", "https://openalex.org/W2952582877"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2026, "cited_by_count": 6}, {"year": 2025, "cited_by_count": 23}, {"year": 2024, "cited_by_count": 46}, {"year": 2023, "cited_by_count": 33}, {"year": 2022, "cited_by_count": 37}, {"year": 2021, "cited_by_count": 50}, {"year": 2020, "cited_by_count": 52}, {"year": 2019, "cited_by_count": 34}, {"year": 2018, "cited_by_count": 13}], "updated_date": "2026-05-05T08:41:31.759640", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W4388208929", "doi": "https://doi.org/10.1016/j.neucom.2023.126986", "title": "Personalized robotic control via constrained multi-objective reinforcement learning", "display_name": "Personalized robotic control via constrained multi-objective reinforcement learning", "relevance_score": 189.05775, "publication_year": 2023, "publication_date": "2023-11-02", "ids": {"openalex": "https://openalex.org/W4388208929", "doi": "https://doi.org/10.1016/j.neucom.2023.126986"}, "language": "en", "primary_location": {"id": "doi:10.1016/j.neucom.2023.126986", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.neucom.2023.126986", "pdf_url": null, "source": {"id": "https://openalex.org/S45693802", "display_name": "Neurocomputing", "issn_l": "0925-2312", "issn": ["0925-2312", "1872-8286"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Neurocomputing", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5061259393", "display_name": "Xiangkun He", "orcid": "https://orcid.org/0000-0001-9818-0879"}, "institutions": [{"id": "https://openalex.org/I172675005", "display_name": "Nanyang Technological University", "ror": "https://ror.org/02e7b5302", "country_code": "SG", "type": "education", "lineage": ["https://openalex.org/I172675005"]}], "countries": ["SG"], "is_corresponding": false, "raw_author_name": "Xiangkun He", "raw_affiliation_strings": ["School of Mechanical and Aerospace Engineering and Continental-NTU Corporate Lab, Nanyang Technological University, Singapore"], "raw_orcid": "https://orcid.org/0000-0001-9818-0879", "affiliations": [{"raw_affiliation_string": "School of Mechanical and Aerospace Engineering and Continental-NTU Corporate Lab, Nanyang Technological University, Singapore", "institution_ids": ["https://openalex.org/I172675005"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5038147203", "display_name": "Zhongxu Hu", "orcid": "https://orcid.org/0000-0001-8236-7903"}, "institutions": [{"id": "https://openalex.org/I172675005", "display_name": "Nanyang Technological University", "ror": "https://ror.org/02e7b5302", "country_code": "SG", "type": "education", "lineage": ["https://openalex.org/I172675005"]}], "countries": ["SG"], "is_corresponding": false, "raw_author_name": "Zhongxu Hu", "raw_affiliation_strings": ["School of Mechanical and Aerospace Engineering and Continental-NTU Corporate Lab, Nanyang Technological University, Singapore"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "School of Mechanical and Aerospace Engineering and Continental-NTU Corporate Lab, Nanyang Technological University, Singapore", "institution_ids": ["https://openalex.org/I172675005"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5024672407", "display_name": "Haohan Yang", "orcid": "https://orcid.org/0000-0002-1545-2793"}, "institutions": [{"id": "https://openalex.org/I172675005", "display_name": "Nanyang Technological University", "ror": "https://ror.org/02e7b5302", "country_code": "SG", "type": "education", "lineage": ["https://openalex.org/I172675005"]}], "countries": ["SG"], "is_corresponding": false, "raw_author_name": "Haohan Yang", "raw_affiliation_strings": ["School of Mechanical and Aerospace Engineering and Continental-NTU Corporate Lab, Nanyang Technological University, Singapore"], "raw_orcid": "https://orcid.org/0000-0002-1545-2793", "affiliations": [{"raw_affiliation_string": "School of Mechanical and Aerospace Engineering and Continental-NTU Corporate Lab, Nanyang Technological University, Singapore", "institution_ids": ["https://openalex.org/I172675005"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5072073374", "display_name": "Chen Lv", "orcid": "https://orcid.org/0000-0001-6897-4512"}, "institutions": [{"id": "https://openalex.org/I172675005", "display_name": "Nanyang Technological University", "ror": "https://ror.org/02e7b5302", "country_code": "SG", "type": "education", "lineage": ["https://openalex.org/I172675005"]}], "countries": ["SG"], "is_corresponding": true, "raw_author_name": "Chen Lv", "raw_affiliation_strings": ["School of Mechanical and Aerospace Engineering and Continental-NTU Corporate Lab, Nanyang Technological University, Singapore"], "raw_orcid": "https://orcid.org/0000-0001-6897-4512", "affiliations": [{"raw_affiliation_string": "School of Mechanical and Aerospace Engineering and Continental-NTU Corporate Lab, Nanyang Technological University, Singapore", "institution_ids": ["https://openalex.org/I172675005"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 4, "corresponding_author_ids": ["https://openalex.org/A5072073374"], "corresponding_institution_ids": ["https://openalex.org/I172675005"], "apc_list": {"value": 2470, "currency": "USD", "value_usd": 2470}, "apc_paid": null, "fwci": 2.8969, "has_fulltext": false, "cited_by_count": 17, "citation_normalized_percentile": {"value": 0.92747567, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 96, "max": 99}, "biblio": {"volume": "565", "issue": null, "first_page": "126986", "last_page": "126986"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9972000122070312, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9972000122070312, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10791", "display_name": "Advanced Control Systems Optimization", "score": 0.9886000156402588, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10848", "display_name": "Advanced Multi-Objective Optimization Algorithms", "score": 0.9857000112533569, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.81435227394104}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7493711709976196}, {"id": "https://openalex.org/keywords/markov-decision-process", "display_name": "Markov decision process", "score": 0.6510542035102844}, {"id": "https://openalex.org/keywords/mathematical-optimization", "display_name": "Mathematical optimization", "score": 0.47627753019332886}, {"id": "https://openalex.org/keywords/state-space", "display_name": "State space", "score": 0.4742000699043274}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.47277453541755676}, {"id": "https://openalex.org/keywords/optimal-control", "display_name": "Optimal control", "score": 0.4229719638824463}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.4176325798034668}, {"id": "https://openalex.org/keywords/convergence", "display_name": "Convergence (economics)", "score": 0.41013482213020325}, {"id": "https://openalex.org/keywords/markov-process", "display_name": "Markov process", "score": 0.29192304611206055}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.12359818816184998}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.81435227394104}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7493711709976196}, {"id": "https://openalex.org/C106189395", "wikidata": "https://www.wikidata.org/wiki/Q176789", "display_name": "Markov decision process", "level": 3, "score": 0.6510542035102844}, {"id": "https://openalex.org/C126255220", "wikidata": "https://www.wikidata.org/wiki/Q141495", "display_name": "Mathematical optimization", "level": 1, "score": 0.47627753019332886}, {"id": "https://openalex.org/C72434380", "wikidata": "https://www.wikidata.org/wiki/Q230930", "display_name": "State space", "level": 2, "score": 0.4742000699043274}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.47277453541755676}, {"id": "https://openalex.org/C91575142", "wikidata": "https://www.wikidata.org/wiki/Q1971426", "display_name": "Optimal control", "level": 2, "score": 0.4229719638824463}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.4176325798034668}, {"id": "https://openalex.org/C2777303404", "wikidata": "https://www.wikidata.org/wiki/Q759757", "display_name": "Convergence (economics)", "level": 2, "score": 0.41013482213020325}, {"id": "https://openalex.org/C159886148", "wikidata": "https://www.wikidata.org/wiki/Q176645", "display_name": "Markov process", "level": 2, "score": 0.29192304611206055}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.12359818816184998}, {"id": "https://openalex.org/C50522688", "wikidata": "https://www.wikidata.org/wiki/Q189833", "display_name": "Economic growth", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C162324750", "wikidata": "https://www.wikidata.org/wiki/Q8134", "display_name": "Economics", "level": 0, "score": 0.0}, {"id": "https://openalex.org/C105795698", "wikidata": "https://www.wikidata.org/wiki/Q12483", "display_name": "Statistics", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 2, "locations": [{"id": "doi:10.1016/j.neucom.2023.126986", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.neucom.2023.126986", "pdf_url": null, "source": {"id": "https://openalex.org/S45693802", "display_name": "Neurocomputing", "issn_l": "0925-2312", "issn": ["0925-2312", "1872-8286"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Neurocomputing", "raw_type": "journal-article"}, {"id": "pmh:oai:dr.ntu.edu.sg:10356/173290", "is_oa": false, "landing_page_url": "https://hdl.handle.net/10356/173290", "pdf_url": null, "source": {"id": "https://openalex.org/S4306402609", "display_name": "DR-NTU (Nanyang Technological University)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I172675005", "host_organization_name": "Nanyang Technological University", "host_organization_lineage": ["https://openalex.org/I172675005"], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": "Journal Article"}], "best_oa_location": null, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/16", "score": 0.75, "display_name": "Peace, Justice and strong institutions"}], "awards": [], "funders": [{"id": "https://openalex.org/F4320320696", "display_name": "Agency for Science, Technology and Research", "ror": "https://ror.org/036wvzt09"}], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 63, "referenced_works": ["https://openalex.org/W1566405224", "https://openalex.org/W1987725948", "https://openalex.org/W1998649829", "https://openalex.org/W2012612381", "https://openalex.org/W2102660061", "https://openalex.org/W2106334424", "https://openalex.org/W2129997629", "https://openalex.org/W2565516711", "https://openalex.org/W2625286567", "https://openalex.org/W2800773442", "https://openalex.org/W2899474210", "https://openalex.org/W2911087563", "https://openalex.org/W2950912723", "https://openalex.org/W2969489701", "https://openalex.org/W2981915592", "https://openalex.org/W2990138404", "https://openalex.org/W2999905431", "https://openalex.org/W3035200689", "https://openalex.org/W3045059767", "https://openalex.org/W3088811935", "https://openalex.org/W3105444733", "https://openalex.org/W3172321357", "https://openalex.org/W3212993431", "https://openalex.org/W4214646100", "https://openalex.org/W4225773298", "https://openalex.org/W4296335812", "https://openalex.org/W4310082948", "https://openalex.org/W4312876648", "https://openalex.org/W4323979480", "https://openalex.org/W4324135644", "https://openalex.org/W4365145202", "https://openalex.org/W4365451933", "https://openalex.org/W4365800070", "https://openalex.org/W4365806381", "https://openalex.org/W4382051552", "https://openalex.org/W4382119270", "https://openalex.org/W4385196529", "https://openalex.org/W4386471863", "https://openalex.org/W4386824909", "https://openalex.org/W6601313673", "https://openalex.org/W6603177225", "https://openalex.org/W6647175844", "https://openalex.org/W6661004726", "https://openalex.org/W6674628981", "https://openalex.org/W6677504650", "https://openalex.org/W6683153233", "https://openalex.org/W6737893269", "https://openalex.org/W6754554871", "https://openalex.org/W6755928903", "https://openalex.org/W6758020833", "https://openalex.org/W6764346552", "https://openalex.org/W6766952794", "https://openalex.org/W6767486501", "https://openalex.org/W6772562993", "https://openalex.org/W6777159553", "https://openalex.org/W6779345118", "https://openalex.org/W6779795787", "https://openalex.org/W6797282509", "https://openalex.org/W6850534773", "https://openalex.org/W6851579164", "https://openalex.org/W6852176339", "https://openalex.org/W6853396997", "https://openalex.org/W6855027571"], "related_works": ["https://openalex.org/W3096874164", "https://openalex.org/W2937181779", "https://openalex.org/W2386410636", "https://openalex.org/W1985560493", "https://openalex.org/W2357975469", "https://openalex.org/W2145363145", "https://openalex.org/W1626977535", "https://openalex.org/W4284974072", "https://openalex.org/W2341346307", "https://openalex.org/W4225269853"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2026, "cited_by_count": 1}, {"year": 2025, "cited_by_count": 11}, {"year": 2024, "cited_by_count": 5}], "updated_date": "2025-11-06T03:46:38.306776", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W2109169869", "doi": "https://doi.org/10.1145/1273496.1273590", "title": "Reinforcement learning by reward-weighted regression for operational space control", "display_name": "Reinforcement learning by reward-weighted regression for operational space control", "relevance_score": 182.41948, "publication_year": 2007, "publication_date": "2007-06-20", "ids": {"openalex": "https://openalex.org/W2109169869", "doi": "https://doi.org/10.1145/1273496.1273590", "mag": "2109169869"}, "language": "en", "primary_location": {"id": "doi:10.1145/1273496.1273590", "is_oa": false, "landing_page_url": "https://doi.org/10.1145/1273496.1273590", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the 24th international conference on Machine learning", "raw_type": "proceedings-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": true, "oa_status": "green", "oa_url": "http://hdl.handle.net/11858/00-001M-0000-0013-CD69-F", "any_repository_has_fulltext": true}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5071367253", "display_name": "Jan Peters", "orcid": "https://orcid.org/0000-0002-5266-8091"}, "institutions": [{"id": "https://openalex.org/I4210112925", "display_name": "Max Planck Institute for Biological Cybernetics", "ror": "https://ror.org/026nmvv73", "country_code": "DE", "type": "facility", "lineage": ["https://openalex.org/I149899117", "https://openalex.org/I4210112925"]}, {"id": "https://openalex.org/I149899117", "display_name": "Max Planck Society", "ror": "https://ror.org/01hhn8329", "country_code": "DE", "type": "funder", "lineage": ["https://openalex.org/I149899117"]}], "countries": ["DE"], "is_corresponding": true, "raw_author_name": "Jan Peters", "raw_affiliation_strings": ["Max-Planck Institute for Biological Cybernetics, Tuebingen, Germany", "Max Planck Institute for Biological Cybernetics, Tuebingen, Germany#TAB#"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Max-Planck Institute for Biological Cybernetics, Tuebingen, Germany", "institution_ids": ["https://openalex.org/I4210112925"]}, {"raw_affiliation_string": "Max Planck Institute for Biological Cybernetics, Tuebingen, Germany#TAB#", "institution_ids": ["https://openalex.org/I149899117"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5029642293", "display_name": "Stefan Schaal", "orcid": "https://orcid.org/0000-0001-5660-1874"}, "institutions": [{"id": "https://openalex.org/I1174212", "display_name": "University of Southern California", "ror": "https://ror.org/03taz7m60", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I1174212"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Stefan Schaal", "raw_affiliation_strings": ["University of Southern California, Los Angeles, CA", "University of Southern California,,,Los Angeles,CA,"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "University of Southern California, Los Angeles, CA", "institution_ids": ["https://openalex.org/I1174212"]}, {"raw_affiliation_string": "University of Southern California,,,Los Angeles,CA,", "institution_ids": ["https://openalex.org/I1174212"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 2, "corresponding_author_ids": ["https://openalex.org/A5071367253"], "corresponding_institution_ids": ["https://openalex.org/I149899117", "https://openalex.org/I4210112925"], "apc_list": null, "apc_paid": null, "fwci": 13.1797, "has_fulltext": true, "cited_by_count": 184, "citation_normalized_percentile": {"value": 0.98648706, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 96, "max": 100}, "biblio": {"volume": null, "issue": null, "first_page": "745", "last_page": "750"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9975000023841858, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10653", "display_name": "Robot Manipulation and Learning", "score": 0.9975000023841858, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10876", "display_name": "Fault Detection and Control Systems", "score": 0.9940999746322632, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11701", "display_name": "Space Satellite Systems and Control", "score": 0.984499990940094, "subfield": {"id": "https://openalex.org/subfields/2202", "display_name": "Aerospace Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8225122690200806}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.5859369039535522}, {"id": "https://openalex.org/keywords/reinforcement", "display_name": "Reinforcement", "score": 0.5646054148674011}, {"id": "https://openalex.org/keywords/regression", "display_name": "Regression", "score": 0.5499276518821716}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.5398691296577454}, {"id": "https://openalex.org/keywords/regression-analysis", "display_name": "Regression analysis", "score": 0.5326943397521973}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.5225731134414673}, {"id": "https://openalex.org/keywords/space", "display_name": "Space (punctuation)", "score": 0.5042723417282104}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.41009098291397095}, {"id": "https://openalex.org/keywords/psychology", "display_name": "Psychology", "score": 0.2906681299209595}, {"id": "https://openalex.org/keywords/statistics", "display_name": "Statistics", "score": 0.2629961371421814}, {"id": "https://openalex.org/keywords/mathematics", "display_name": "Mathematics", "score": 0.1873648762702942}, {"id": "https://openalex.org/keywords/social-psychology", "display_name": "Social psychology", "score": 0.15204155445098877}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8225122690200806}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.5859369039535522}, {"id": "https://openalex.org/C67203356", "wikidata": "https://www.wikidata.org/wiki/Q1321905", "display_name": "Reinforcement", "level": 2, "score": 0.5646054148674011}, {"id": "https://openalex.org/C83546350", "wikidata": "https://www.wikidata.org/wiki/Q1139051", "display_name": "Regression", "level": 2, "score": 0.5499276518821716}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.5398691296577454}, {"id": "https://openalex.org/C152877465", "wikidata": "https://www.wikidata.org/wiki/Q208042", "display_name": "Regression analysis", "level": 2, "score": 0.5326943397521973}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.5225731134414673}, {"id": "https://openalex.org/C2778572836", "wikidata": "https://www.wikidata.org/wiki/Q380933", "display_name": "Space (punctuation)", "level": 2, "score": 0.5042723417282104}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.41009098291397095}, {"id": "https://openalex.org/C15744967", "wikidata": "https://www.wikidata.org/wiki/Q9418", "display_name": "Psychology", "level": 0, "score": 0.2906681299209595}, {"id": "https://openalex.org/C105795698", "wikidata": "https://www.wikidata.org/wiki/Q12483", "display_name": "Statistics", "level": 1, "score": 0.2629961371421814}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.1873648762702942}, {"id": "https://openalex.org/C77805123", "wikidata": "https://www.wikidata.org/wiki/Q161272", "display_name": "Social psychology", "level": 1, "score": 0.15204155445098877}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.0}], "mesh": [], "locations_count": 4, "locations": [{"id": "doi:10.1145/1273496.1273590", "is_oa": false, "landing_page_url": "https://doi.org/10.1145/1273496.1273590", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Proceedings of the 24th international conference on Machine learning", "raw_type": "proceedings-article"}, {"id": "pmh:oai:pure.mpg.de:item_1790444", "is_oa": true, "landing_page_url": "http://hdl.handle.net/11858/00-001M-0000-0013-CD69-F", "pdf_url": "http://hdl.handle.net/11858/00-001M-0000-0013-CD69-F", "source": {"id": "https://openalex.org/S4306400654", "display_name": "MPG.PuRe (Max Planck Society)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I149899117", "host_organization_name": "Max Planck Society", "host_organization_lineage": ["https://openalex.org/I149899117"], "host_organization_lineage_names": [], "type": "repository"}, "license": "other-oa", "license_id": "https://openalex.org/licenses/other-oa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "ICML '07: 24th International Conference on Machine Learning", "raw_type": "info:eu-repo/semantics/conferenceObject"}, {"id": "pmh:oai:CiteSeerX.psu:10.1.1.79.6266", "is_oa": false, "landing_page_url": "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.79.6266", "pdf_url": null, "source": null, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "http://imls.engr.oregonstate.edu/www/htdocs/proceedings/icml2007/papers/98.pdf", "raw_type": "text"}, {"id": "pmh:oai:edoc.mpg.de:352337", "is_oa": false, "landing_page_url": "http://edoc.mpg.de/352337", "pdf_url": null, "source": {"id": "https://openalex.org/S4406922265", "display_name": "Max Planck Institute for Plasma Physics", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": null, "host_organization_name": null, "host_organization_lineage": [], "host_organization_lineage_names": [], "type": "repository"}, "license": null, "license_id": null, "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "Proceedings of the 24th Annual International Conference on Machine Learning (ICML 2007), 745-750 (2007)", "raw_type": "Conference-Paper"}], "best_oa_location": {"id": "pmh:oai:pure.mpg.de:item_1790444", "is_oa": true, "landing_page_url": "http://hdl.handle.net/11858/00-001M-0000-0013-CD69-F", "pdf_url": "http://hdl.handle.net/11858/00-001M-0000-0013-CD69-F", "source": {"id": "https://openalex.org/S4306400654", "display_name": "MPG.PuRe (Max Planck Society)", "issn_l": null, "issn": null, "is_oa": false, "is_in_doaj": false, "is_core": false, "host_organization": "https://openalex.org/I149899117", "host_organization_name": "Max Planck Society", "host_organization_lineage": ["https://openalex.org/I149899117"], "host_organization_lineage_names": [], "type": "repository"}, "license": "other-oa", "license_id": "https://openalex.org/licenses/other-oa", "version": "submittedVersion", "is_accepted": false, "is_published": false, "raw_source_name": "ICML '07: 24th International Conference on Machine Learning", "raw_type": "info:eu-repo/semantics/conferenceObject"}, "sustainable_development_goals": [{"id": "https://metadata.un.org/sdg/16", "score": 0.7200000286102295, "display_name": "Peace, Justice and strong institutions"}], "awards": [], "funders": [], "has_content": {"pdf": true, "grobid_xml": true}, "content_urls": {"pdf": "https://content.openalex.org/works/W2109169869.pdf", "grobid_xml": "https://content.openalex.org/works/W2109169869.grobid-xml"}, "referenced_works_count": 13, "referenced_works": ["https://openalex.org/W203276351", "https://openalex.org/W639693478", "https://openalex.org/W1949974402", "https://openalex.org/W1984178167", "https://openalex.org/W1988946454", "https://openalex.org/W2043968544", "https://openalex.org/W2064076655", "https://openalex.org/W2080039641", "https://openalex.org/W2097815751", "https://openalex.org/W2107726111", "https://openalex.org/W2112474089", "https://openalex.org/W2135194391", "https://openalex.org/W2150012741"], "related_works": ["https://openalex.org/W3074294383", "https://openalex.org/W4206669594", "https://openalex.org/W2961085424", "https://openalex.org/W2959276766", "https://openalex.org/W4295941380", "https://openalex.org/W260766989", "https://openalex.org/W3139193008", "https://openalex.org/W4306674287", "https://openalex.org/W2909304650", "https://openalex.org/W4319083788"], "abstract_inverted_index": {"Many": [0], "robot": [1], "control": [2, 36], "problems": [3], "of": [4, 22, 51, 73, 88, 125], "practical": [5], "importance,": [6], "including": [7], "operational": [8], "space": [9], "control,": [10], "can": [11, 30], "be": [12, 31], "reformulated": [13], "as": [14, 39], "immediate": [15, 91], "reward": [16, 102], "reinforcement": [17, 27, 76], "learning": [18, 28, 35, 77, 89], "problems.": [19], "However,": [20], "few": [21], "the": [23, 74, 86], "known": [24], "optimization": [25], "or": [26, 54], "algorithms": [29], "used": [32], "in": [33, 117, 123], "online": [34], "for": [37, 66, 104], "robots,": [38, 53], "they": [40], "are": [41, 64], "either": [42], "prohibitively": [43], "slow,": [44], "do": [45], "not": [46], "scale": [47], "to": [48, 93], "interesting": [49], "domains": [50], "complex": [52, 126], "require": [55], "trying": [56], "out": [57], "policies": [58], "generated": [59], "by": [60, 80], "random": [61], "search,": [62], "which": [63], "infeasible": [65], "a": [67, 71, 94], "physical": [68], "system.": [69], "Using": [70], "generalization": [72], "EM-base": [75], "framework": [78], "suggested": [79], "Dayan": [81], "&amp;amp;": [82], "Hinton,": [83], "we": [84], "reduce": [85], "problem": [87, 97], "with": [90, 98], "rewards": [92], "reward-weighted": [95], "regression": [96], "an": [99], "adaptive,": [100], "integrated": [101], "transformation": [103], "faster": [105], "convergence.": [106], "The": [107], "resulting": [108], "algorithm": [109], "is": [110], "efficient,": [111], "learns": [112], "smoothly": [113], "without": [114], "dangerous": [115], "jumps": [116], "solution": [118], "space,": [119], "and": [120], "works": [121], "well": [122], "applications": [124], "high": [127], "degreeof-freedom": [128], "robots.": [129], "1.": [130]}, "counts_by_year": [{"year": 2026, "cited_by_count": 3}, {"year": 2025, "cited_by_count": 17}, {"year": 2024, "cited_by_count": 6}, {"year": 2023, "cited_by_count": 4}, {"year": 2022, "cited_by_count": 7}, {"year": 2021, "cited_by_count": 20}, {"year": 2020, "cited_by_count": 20}, {"year": 2019, "cited_by_count": 19}, {"year": 2018, "cited_by_count": 15}, {"year": 2017, "cited_by_count": 16}, {"year": 2016, "cited_by_count": 9}, {"year": 2015, "cited_by_count": 3}, {"year": 2014, "cited_by_count": 7}, {"year": 2013, "cited_by_count": 6}, {"year": 2012, "cited_by_count": 8}], "updated_date": "2026-04-25T08:17:42.794288", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3093426589", "doi": "https://doi.org/10.1631/fitee.1900533", "title": "Deep reinforcement learning: a survey", "display_name": "Deep reinforcement learning: a survey", "relevance_score": 180.7246, "publication_year": 2020, "publication_date": "2020-10-15", "ids": {"openalex": "https://openalex.org/W3093426589", "doi": "https://doi.org/10.1631/fitee.1900533", "mag": "3093426589"}, "language": "en", "primary_location": {"id": "doi:10.1631/fitee.1900533", "is_oa": false, "landing_page_url": "https://doi.org/10.1631/fitee.1900533", "pdf_url": null, "source": {"id": "https://openalex.org/S4210189857", "display_name": "Frontiers of Information Technology & Electronic Engineering", "issn_l": "2095-9184", "issn": ["2095-9184", "2095-9230"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Frontiers of Information Technology &amp; Electronic Engineering", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": null, "display_name": "Hao-nan Wang", "orcid": "https://orcid.org/0000-0002-0792-3858"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Hao-nan Wang", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": "https://orcid.org/0000-0002-0792-3858", "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5101683584", "display_name": "Ning Liu", "orcid": "https://orcid.org/0000-0001-7475-9739"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Ning Liu", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5040322824", "display_name": "Yiyun Zhang", "orcid": "https://orcid.org/0000-0002-1419-485X"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Yi-yun Zhang", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5039795290", "display_name": "Dawei Feng", "orcid": "https://orcid.org/0000-0002-7587-8905"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Da-wei Feng", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5051363049", "display_name": "Feng Huang", "orcid": "https://orcid.org/0000-0002-0740-9373"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Feng Huang", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5100681030", "display_name": "Dongsheng Li", "orcid": "https://orcid.org/0000-0001-7725-8040"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Dong-sheng Li", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5100395388", "display_name": "Yiming Zhang", "orcid": "https://orcid.org/0000-0003-3857-8433"}, "institutions": [{"id": "https://openalex.org/I170215575", "display_name": "National University of Defense Technology", "ror": "https://ror.org/05d2yfz11", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I170215575"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Yi-ming Zhang", "raw_affiliation_strings": ["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, 410000, China", "institution_ids": ["https://openalex.org/I170215575"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 7, "corresponding_author_ids": [], "corresponding_institution_ids": ["https://openalex.org/I170215575"], "apc_list": null, "apc_paid": null, "fwci": 13.3273, "has_fulltext": false, "cited_by_count": 276, "citation_normalized_percentile": {"value": 0.9904292, "is_in_top_1_percent": true, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 90, "max": 100}, "biblio": {"volume": "21", "issue": "12", "first_page": "1726", "last_page": "1744"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9997000098228455, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T11975", "display_name": "Evolutionary Algorithms and Applications", "score": 0.9897000193595886, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12784", "display_name": "Modular Robots and Swarm Intelligence", "score": 0.9832000136375427, "subfield": {"id": "https://openalex.org/subfields/2210", "display_name": "Mechanical Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.8006557822227478}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.7813801765441895}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.6882914900779724}, {"id": "https://openalex.org/keywords/categorization", "display_name": "Categorization", "score": 0.5173744559288025}, {"id": "https://openalex.org/keywords/deep-learning", "display_name": "Deep learning", "score": 0.4922133982181549}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.39875051379203796}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.8006557822227478}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.7813801765441895}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.6882914900779724}, {"id": "https://openalex.org/C94124525", "wikidata": "https://www.wikidata.org/wiki/Q912550", "display_name": "Categorization", "level": 2, "score": 0.5173744559288025}, {"id": "https://openalex.org/C108583219", "wikidata": "https://www.wikidata.org/wiki/Q197536", "display_name": "Deep learning", "level": 2, "score": 0.4922133982181549}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.39875051379203796}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1631/fitee.1900533", "is_oa": false, "landing_page_url": "https://doi.org/10.1631/fitee.1900533", "pdf_url": null, "source": {"id": "https://openalex.org/S4210189857", "display_name": "Frontiers of Information Technology & Electronic Engineering", "issn_l": "2095-9184", "issn": ["2095-9184", "2095-9230"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310319900", "host_organization_name": "Springer Science+Business Media", "host_organization_lineage": ["https://openalex.org/P4310319900", "https://openalex.org/P4310319965"], "host_organization_lineage_names": ["Springer Science+Business Media", "Springer Nature"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Frontiers of Information Technology &amp; Electronic Engineering", "raw_type": "journal-article"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 154, "referenced_works": ["https://openalex.org/W41554520", "https://openalex.org/W1575592356", "https://openalex.org/W1757796397", "https://openalex.org/W1968962398", "https://openalex.org/W1988526405", "https://openalex.org/W1999874108", "https://openalex.org/W2021247827", "https://openalex.org/W2061562262", "https://openalex.org/W2098774185", "https://openalex.org/W2100401322", "https://openalex.org/W2100677568", "https://openalex.org/W2104733512", "https://openalex.org/W2106164082", "https://openalex.org/W2108738385", "https://openalex.org/W2119717200", "https://openalex.org/W2121863487", "https://openalex.org/W2124352385", "https://openalex.org/W2126909264", "https://openalex.org/W2141559645", "https://openalex.org/W2145339207", "https://openalex.org/W2150468603", "https://openalex.org/W2155968351", "https://openalex.org/W2163605009", "https://openalex.org/W2165150801", "https://openalex.org/W2169498096", "https://openalex.org/W2173564293", "https://openalex.org/W2257979135", "https://openalex.org/W2280163991", "https://openalex.org/W2290104316", "https://openalex.org/W2417786368", "https://openalex.org/W2419612459", "https://openalex.org/W2434014514", "https://openalex.org/W2466175722", "https://openalex.org/W2523013761", "https://openalex.org/W2528489519", "https://openalex.org/W2529477964", "https://openalex.org/W2529658650", "https://openalex.org/W2546571074", "https://openalex.org/W2550182557", "https://openalex.org/W2556958149", "https://openalex.org/W2561776174", "https://openalex.org/W2575705757", "https://openalex.org/W2593044849", "https://openalex.org/W2596982695", "https://openalex.org/W2604373826", "https://openalex.org/W2604763608", "https://openalex.org/W2614839826", "https://openalex.org/W2616964725", "https://openalex.org/W2726187156", "https://openalex.org/W2727576081", "https://openalex.org/W2736601468", "https://openalex.org/W2738669288", "https://openalex.org/W2739473244", "https://openalex.org/W2746553466", "https://openalex.org/W2749604329", "https://openalex.org/W2749807327", "https://openalex.org/W2755546070", "https://openalex.org/W2761873684", "https://openalex.org/W2765349170", "https://openalex.org/W2766610320", "https://openalex.org/W2766812927", "https://openalex.org/W2767050701", "https://openalex.org/W2781726626", "https://openalex.org/W2785397462", "https://openalex.org/W2786928559", "https://openalex.org/W2787501667", "https://openalex.org/W2787938642", "https://openalex.org/W2788357188", "https://openalex.org/W2788904251", "https://openalex.org/W2789824229", "https://openalex.org/W2795109282", "https://openalex.org/W2796290181", "https://openalex.org/W2805560727", "https://openalex.org/W2805762288", "https://openalex.org/W2810754397", "https://openalex.org/W2810785043", "https://openalex.org/W2824027552", "https://openalex.org/W2885550588", "https://openalex.org/W2888541716", "https://openalex.org/W2889347284", "https://openalex.org/W2892620417", "https://openalex.org/W2895531857", "https://openalex.org/W2903630557", "https://openalex.org/W2912757816", "https://openalex.org/W2914752403", "https://openalex.org/W2923504512", "https://openalex.org/W2938321354", "https://openalex.org/W2949561945", "https://openalex.org/W2949608212", "https://openalex.org/W2950359962", "https://openalex.org/W2950471160", "https://openalex.org/W2950492145", "https://openalex.org/W2950794298", "https://openalex.org/W2951266961", "https://openalex.org/W2951775809", "https://openalex.org/W2951948137", "https://openalex.org/W2952787800", "https://openalex.org/W2954058884", "https://openalex.org/W2962715211", "https://openalex.org/W2962858248", "https://openalex.org/W2962872206", "https://openalex.org/W2962938178", "https://openalex.org/W2963068985", "https://openalex.org/W2963184621", "https://openalex.org/W2963190967", "https://openalex.org/W2963280855", "https://openalex.org/W2963313316", "https://openalex.org/W2963423916", "https://openalex.org/W2963430173", "https://openalex.org/W2963477884", "https://openalex.org/W2963523627", "https://openalex.org/W2963614114", "https://openalex.org/W2963634205", "https://openalex.org/W2963703448", "https://openalex.org/W2963864421", "https://openalex.org/W2963993537", "https://openalex.org/W2964043796", "https://openalex.org/W2964161785", "https://openalex.org/W2964174623", "https://openalex.org/W2964262254", "https://openalex.org/W2964309167", "https://openalex.org/W2968986602", "https://openalex.org/W2971218263", "https://openalex.org/W3037590790", "https://openalex.org/W3101442004", "https://openalex.org/W3103559770", "https://openalex.org/W3103780890", "https://openalex.org/W3104515094", "https://openalex.org/W4241811150", "https://openalex.org/W6600002382", "https://openalex.org/W6600137863", "https://openalex.org/W6600168703", "https://openalex.org/W6600446476", "https://openalex.org/W6600553734", "https://openalex.org/W6601211009", "https://openalex.org/W6601870611", "https://openalex.org/W6602613565", "https://openalex.org/W6602704705", "https://openalex.org/W6603727575", "https://openalex.org/W6603732165", "https://openalex.org/W6605730092", "https://openalex.org/W6606825362", "https://openalex.org/W6606882031", "https://openalex.org/W6608490358", "https://openalex.org/W6633651462", "https://openalex.org/W6675999342", "https://openalex.org/W6676077707", "https://openalex.org/W6718092244", "https://openalex.org/W6743806954", "https://openalex.org/W6746203390", "https://openalex.org/W6772334619", "https://openalex.org/W6811851346", "https://openalex.org/W6814003322", "https://openalex.org/W6846889679"], "related_works": ["https://openalex.org/W2731899572", "https://openalex.org/W2961085424", "https://openalex.org/W3215138031", "https://openalex.org/W4306674287", "https://openalex.org/W3009238340", "https://openalex.org/W2939353110", "https://openalex.org/W4321369474", "https://openalex.org/W4360585206", "https://openalex.org/W4285208911", "https://openalex.org/W3046775127"], "abstract_inverted_index": {"Deep": [0], "reinforcement": [1], "learning": [2], "(RL)": [3], "has": [4, 17], "become": [5], "one": [6], "of": [7], "the": [8, 43, 74, 86], "most": [9], "popular": [10], "topics": [11], "in": [12, 21], "artificial": [13], "intelligence": [14], "research.": [15, 97], "It": [16], "been": [18], "widely": [19], "used": [20], "various": [22], "fields,": [23], "such": [24], "as": [25], "end-to-end": [26], "control,": [27, 29], "robotic": [28], "recommendation": [30], "systems,": [31], "and": [32, 47, 49, 67, 80, 90], "natural": [33], "language": [34], "dialogue": [35], "systems.": [36], "In": [37], "this": [38], "survey,": [39], "we": [40, 84], "systematically": [41], "categorize": [42], "deep": [44, 56], "RL": [45, 57, 69], "algorithms": [46, 58], "applications,": [48, 89], "provide": [50], "a": [51], "detailed": [52], "review": [53], "over": [54], "existing": [55], "by": [59], "dividing": [60], "them": [61], "into": [62], "modelbased": [63], "methods,": [64, 66], "model-free": [65], "advanced": [68], "methods.": [70], "We": [71], "thoroughly": [72], "analyze": [73, 91], "advances": [75], "including": [76], "exploration,": [77], "inverse": [78], "RL,": [79], "transfer": [81], "RL.": [82], "Finally,": [83], "outline": [85], "current": [87], "representative": [88], "four": [92], "open": [93], "problems": [94], "for": [95], "future": [96]}, "counts_by_year": [{"year": 2026, "cited_by_count": 10}, {"year": 2025, "cited_by_count": 61}, {"year": 2024, "cited_by_count": 106}, {"year": 2023, "cited_by_count": 47}, {"year": 2022, "cited_by_count": 35}, {"year": 2021, "cited_by_count": 16}, {"year": 2019, "cited_by_count": 1}], "updated_date": "2026-05-06T08:25:59.206177", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W4386950658", "doi": "https://doi.org/10.1016/j.engappai.2023.107130", "title": "Hierarchical reinforcement learning with adaptive scheduling for robot control", "display_name": "Hierarchical reinforcement learning with adaptive scheduling for robot control", "relevance_score": 179.93169, "publication_year": 2023, "publication_date": "2023-09-22", "ids": {"openalex": "https://openalex.org/W4386950658", "doi": "https://doi.org/10.1016/j.engappai.2023.107130"}, "language": "en", "primary_location": {"id": "doi:10.1016/j.engappai.2023.107130", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.engappai.2023.107130", "pdf_url": null, "source": {"id": "https://openalex.org/S900972176", "display_name": "Engineering Applications of Artificial Intelligence", "issn_l": "0952-1976", "issn": ["0952-1976", "1873-6769"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Engineering Applications of Artificial Intelligence", "raw_type": "journal-article"}, "type": "article", "indexed_in": ["crossref"], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5086179182", "display_name": "Zhigang Huang", "orcid": "https://orcid.org/0000-0003-1959-2192"}, "institutions": [{"id": "https://openalex.org/I3923682", "display_name": "Soochow University", "ror": "https://ror.org/05t8y2r12", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I3923682"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Zhigang Huang", "raw_affiliation_strings": ["School of Computer Science and Technology, Soochow University, 215006, Suzhou, Jiangsu, PR China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "School of Computer Science and Technology, Soochow University, 215006, Suzhou, Jiangsu, PR China", "institution_ids": ["https://openalex.org/I3923682"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5100414574", "display_name": "Quan Liu", "orcid": "https://orcid.org/0000-0002-8710-1810"}, "institutions": [{"id": "https://openalex.org/I3923682", "display_name": "Soochow University", "ror": "https://ror.org/05t8y2r12", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I3923682"]}], "countries": ["CN"], "is_corresponding": true, "raw_author_name": "Quan Liu", "raw_affiliation_strings": ["School of Computer Science and Technology, Soochow University, 215006, Suzhou, Jiangsu, PR China"], "raw_orcid": "https://orcid.org/0000-0002-8710-1810", "affiliations": [{"raw_affiliation_string": "School of Computer Science and Technology, Soochow University, 215006, Suzhou, Jiangsu, PR China", "institution_ids": ["https://openalex.org/I3923682"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5074124642", "display_name": "Fei Zhu", "orcid": "https://orcid.org/0000-0002-2226-2859"}, "institutions": [{"id": "https://openalex.org/I3923682", "display_name": "Soochow University", "ror": "https://ror.org/05t8y2r12", "country_code": "CN", "type": "education", "lineage": ["https://openalex.org/I3923682"]}], "countries": ["CN"], "is_corresponding": false, "raw_author_name": "Fei Zhu", "raw_affiliation_strings": ["School of Computer Science and Technology, Soochow University, 215006, Suzhou, Jiangsu, PR China"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "School of Computer Science and Technology, Soochow University, 215006, Suzhou, Jiangsu, PR China", "institution_ids": ["https://openalex.org/I3923682"]}]}], "institutions": [], "countries_distinct_count": 1, "institutions_distinct_count": 3, "corresponding_author_ids": ["https://openalex.org/A5100414574"], "corresponding_institution_ids": ["https://openalex.org/I3923682"], "apc_list": {"value": 3170, "currency": "USD", "value_usd": 3170}, "apc_paid": null, "fwci": 2.8969, "has_fulltext": false, "cited_by_count": 17, "citation_normalized_percentile": {"value": 0.92696335, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 96, "max": 99}, "biblio": {"volume": "126", "issue": null, "first_page": "107130", "last_page": "107130"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9988999962806702, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.9988999962806702, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T12101", "display_name": "Advanced Bandit Algorithms Research", "score": 0.9835000038146973, "subfield": {"id": "https://openalex.org/subfields/1803", "display_name": "Management Science and Operations Research"}, "field": {"id": "https://openalex.org/fields/18", "display_name": "Decision Sciences"}, "domain": {"id": "https://openalex.org/domains/2", "display_name": "Social Sciences"}}, {"id": "https://openalex.org/T12288", "display_name": "Optimization and Search Problems", "score": 0.9726999998092651, "subfield": {"id": "https://openalex.org/subfields/1705", "display_name": "Computer Networks and Communications"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.860951840877533}, {"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7520056366920471}, {"id": "https://openalex.org/keywords/scheduling", "display_name": "Scheduling (production processes)", "score": 0.5563971996307373}, {"id": "https://openalex.org/keywords/dynamic-priority-scheduling", "display_name": "Dynamic priority scheduling", "score": 0.47208163142204285}, {"id": "https://openalex.org/keywords/distributed-computing", "display_name": "Distributed computing", "score": 0.38748615980148315}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.3841503858566284}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.32550400495529175}, {"id": "https://openalex.org/keywords/mathematical-optimization", "display_name": "Mathematical optimization", "score": 0.3074191212654114}, {"id": "https://openalex.org/keywords/schedule", "display_name": "Schedule", "score": 0.15284273028373718}], "concepts": [{"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.860951840877533}, {"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7520056366920471}, {"id": "https://openalex.org/C206729178", "wikidata": "https://www.wikidata.org/wiki/Q2271896", "display_name": "Scheduling (production processes)", "level": 2, "score": 0.5563971996307373}, {"id": "https://openalex.org/C107568181", "wikidata": "https://www.wikidata.org/wiki/Q5319000", "display_name": "Dynamic priority scheduling", "level": 3, "score": 0.47208163142204285}, {"id": "https://openalex.org/C120314980", "wikidata": "https://www.wikidata.org/wiki/Q180634", "display_name": "Distributed computing", "level": 1, "score": 0.38748615980148315}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.3841503858566284}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.32550400495529175}, {"id": "https://openalex.org/C126255220", "wikidata": "https://www.wikidata.org/wiki/Q141495", "display_name": "Mathematical optimization", "level": 1, "score": 0.3074191212654114}, {"id": "https://openalex.org/C68387754", "wikidata": "https://www.wikidata.org/wiki/Q7271585", "display_name": "Schedule", "level": 2, "score": 0.15284273028373718}, {"id": "https://openalex.org/C111919701", "wikidata": "https://www.wikidata.org/wiki/Q9135", "display_name": "Operating system", "level": 1, "score": 0.0}, {"id": "https://openalex.org/C33923547", "wikidata": "https://www.wikidata.org/wiki/Q395", "display_name": "Mathematics", "level": 0, "score": 0.0}], "mesh": [], "locations_count": 1, "locations": [{"id": "doi:10.1016/j.engappai.2023.107130", "is_oa": false, "landing_page_url": "https://doi.org/10.1016/j.engappai.2023.107130", "pdf_url": null, "source": {"id": "https://openalex.org/S900972176", "display_name": "Engineering Applications of Artificial Intelligence", "issn_l": "0952-1976", "issn": ["0952-1976", "1873-6769"], "is_oa": false, "is_in_doaj": false, "is_core": true, "host_organization": "https://openalex.org/P4310320990", "host_organization_name": "Elsevier BV", "host_organization_lineage": ["https://openalex.org/P4310320990"], "host_organization_lineage_names": ["Elsevier BV"], "type": "journal"}, "license": null, "license_id": null, "version": "publishedVersion", "is_accepted": true, "is_published": true, "raw_source_name": "Engineering Applications of Artificial Intelligence", "raw_type": "journal-article"}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [{"id": "https://openalex.org/G1055180593", "display_name": null, "funder_award_id": "61772355", "funder_id": "https://openalex.org/F4320321001", "funder_display_name": "National Natural Science Foundation of China"}], "funders": [{"id": "https://openalex.org/F4320321001", "display_name": "National Natural Science Foundation of China", "ror": "https://ror.org/01h0zpd94"}, {"id": "https://openalex.org/F4320327518", "display_name": "Priority Academic Program Development of Jiangsu Higher Education Institutions", "ror": null}], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 74, "referenced_works": ["https://openalex.org/W1786044565", "https://openalex.org/W2057447866", "https://openalex.org/W2109910161", "https://openalex.org/W2525579820", "https://openalex.org/W2556477470", "https://openalex.org/W2606433045", "https://openalex.org/W2612690371", "https://openalex.org/W2788741142", "https://openalex.org/W2794940174", "https://openalex.org/W2884439071", "https://openalex.org/W2899077443", "https://openalex.org/W2907385442", "https://openalex.org/W2922007426", "https://openalex.org/W2949126959", "https://openalex.org/W2963523627", "https://openalex.org/W2996695841", "https://openalex.org/W3037620198", "https://openalex.org/W3121879877", "https://openalex.org/W3142041998", "https://openalex.org/W3170298552", "https://openalex.org/W3175662487", "https://openalex.org/W4210700398", "https://openalex.org/W4212774754", "https://openalex.org/W4225304231", "https://openalex.org/W4229447382", "https://openalex.org/W4242073862", "https://openalex.org/W4287162146", "https://openalex.org/W4317382688", "https://openalex.org/W4319215637", "https://openalex.org/W4319966548", "https://openalex.org/W4320561357", "https://openalex.org/W6635351661", "https://openalex.org/W6677067356", "https://openalex.org/W6717230150", "https://openalex.org/W6727349600", "https://openalex.org/W6730641667", "https://openalex.org/W6734325300", "https://openalex.org/W6740801417", "https://openalex.org/W6744597842", "https://openalex.org/W6744935223", "https://openalex.org/W6745503460", "https://openalex.org/W6745830540", "https://openalex.org/W6747473740", "https://openalex.org/W6748839928", "https://openalex.org/W6749853933", "https://openalex.org/W6751494529", "https://openalex.org/W6752089545", "https://openalex.org/W6754715805", "https://openalex.org/W6754957883", "https://openalex.org/W6756303580", "https://openalex.org/W6756554778", "https://openalex.org/W6760847586", "https://openalex.org/W6764724164", "https://openalex.org/W6767317771", "https://openalex.org/W6768602481", "https://openalex.org/W6771906678", "https://openalex.org/W6772008794", "https://openalex.org/W6772772403", "https://openalex.org/W6773246137", "https://openalex.org/W6779715229", "https://openalex.org/W6779780899", "https://openalex.org/W6783140480", "https://openalex.org/W6783582377", "https://openalex.org/W6784680951", "https://openalex.org/W6787042371", "https://openalex.org/W6790486821", "https://openalex.org/W6797246669", "https://openalex.org/W6801928877", "https://openalex.org/W6803978876", "https://openalex.org/W6809755281", "https://openalex.org/W6810204147", "https://openalex.org/W6810778491", "https://openalex.org/W6840873881", "https://openalex.org/W6842577639"], "related_works": ["https://openalex.org/W4306904969", "https://openalex.org/W2138720691", "https://openalex.org/W4362501864", "https://openalex.org/W4380318855", "https://openalex.org/W3084456289", "https://openalex.org/W2024136090", "https://openalex.org/W4391331176", "https://openalex.org/W2031695474", "https://openalex.org/W2503553253", "https://openalex.org/W2012090098"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2026, "cited_by_count": 1}, {"year": 2025, "cited_by_count": 8}, {"year": 2024, "cited_by_count": 8}], "updated_date": "2026-02-03T00:53:05.648605", "created_date": "2025-10-10T00:00:00"}, {"id": "https://openalex.org/W3034757316", "doi": null, "title": "Prediction-Guided Multi-Objective Reinforcement Learning for Continuous Robot Control", "display_name": "Prediction-Guided Multi-Objective Reinforcement Learning for Continuous Robot Control", "relevance_score": 179.72397, "publication_year": 2020, "publication_date": "2020-07-12", "ids": {"openalex": "https://openalex.org/W3034757316", "mag": "3034757316"}, "language": "en", "primary_location": {"id": "mag:3034757316", "is_oa": false, "landing_page_url": null, "pdf_url": null, "source": null, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": false, "raw_source_name": null, "raw_type": null}, "type": "article", "indexed_in": [], "open_access": {"is_oa": false, "oa_status": "closed", "oa_url": null, "any_repository_has_fulltext": false}, "authorships": [{"author_position": "first", "author": {"id": "https://openalex.org/A5086301079", "display_name": "Jie Xu", "orcid": "https://orcid.org/0000-0003-3510-3387"}, "institutions": [{"id": "https://openalex.org/I63966007", "display_name": "Massachusetts Institute of Technology", "ror": "https://ror.org/042nb2s44", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I63966007"]}], "countries": ["US"], "is_corresponding": true, "raw_author_name": "Jie Xu", "raw_affiliation_strings": ["Massachusetts Institute Of Technology#TAB#"], "raw_orcid": "https://orcid.org/0000-0003-3510-3387", "affiliations": [{"raw_affiliation_string": "Massachusetts Institute Of Technology#TAB#", "institution_ids": ["https://openalex.org/I63966007"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5057506352", "display_name": "Yunsheng Tian", "orcid": "https://orcid.org/0000-0002-6471-7575"}, "institutions": [{"id": "https://openalex.org/I63966007", "display_name": "Massachusetts Institute of Technology", "ror": "https://ror.org/042nb2s44", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I63966007"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Yunsheng Tian", "raw_affiliation_strings": ["Massachusetts Institute Of Technology#TAB#"], "raw_orcid": "https://orcid.org/0000-0002-6471-7575", "affiliations": [{"raw_affiliation_string": "Massachusetts Institute Of Technology#TAB#", "institution_ids": ["https://openalex.org/I63966007"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5102716109", "display_name": "Pingchuan Ma", "orcid": "https://orcid.org/0009-0007-5973-6812"}, "institutions": [{"id": "https://openalex.org/I4210109586", "display_name": "Moscow Institute of Thermal Technology", "ror": "https://ror.org/021es5e59", "country_code": "RU", "type": "facility", "lineage": ["https://openalex.org/I4210109586"]}], "countries": ["RU"], "is_corresponding": false, "raw_author_name": "Pingchuan Ma", "raw_affiliation_strings": ["-MIT"], "raw_orcid": "https://orcid.org/0009-0007-5973-6812", "affiliations": [{"raw_affiliation_string": "-MIT", "institution_ids": ["https://openalex.org/I4210109586"]}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5066830185", "display_name": "Daniela Rus", "orcid": "https://orcid.org/0000-0001-5473-3566"}, "institutions": [], "countries": [], "is_corresponding": false, "raw_author_name": "Daniela Rus", "raw_affiliation_strings": ["MIT - CSAIL"], "raw_orcid": "https://orcid.org/0000-0001-5473-3566", "affiliations": [{"raw_affiliation_string": "MIT - CSAIL", "institution_ids": []}]}, {"author_position": "middle", "author": {"id": "https://openalex.org/A5039112646", "display_name": "Shinjiro Sueda", "orcid": "https://orcid.org/0000-0003-4656-498X"}, "institutions": [{"id": "https://openalex.org/I91045830", "display_name": "Texas A&M University", "ror": "https://ror.org/01f5ytq51", "country_code": "US", "type": "education", "lineage": ["https://openalex.org/I91045830"]}], "countries": ["US"], "is_corresponding": false, "raw_author_name": "Shinjiro Sueda", "raw_affiliation_strings": ["\u2020Texas A&M University"], "raw_orcid": null, "affiliations": [{"raw_affiliation_string": "\u2020Texas A&M University", "institution_ids": ["https://openalex.org/I91045830"]}]}, {"author_position": "last", "author": {"id": "https://openalex.org/A5018010391", "display_name": "Wojciech Matusik", "orcid": "https://orcid.org/0000-0003-0212-5643"}, "institutions": [{"id": "https://openalex.org/I4210109586", "display_name": "Moscow Institute of Thermal Technology", "ror": "https://ror.org/021es5e59", "country_code": "RU", "type": "facility", "lineage": ["https://openalex.org/I4210109586"]}], "countries": ["RU"], "is_corresponding": false, "raw_author_name": "Wojciech Matusik", "raw_affiliation_strings": ["-MIT"], "raw_orcid": "https://orcid.org/0000-0003-0212-5643", "affiliations": [{"raw_affiliation_string": "-MIT", "institution_ids": ["https://openalex.org/I4210109586"]}]}], "institutions": [], "countries_distinct_count": 2, "institutions_distinct_count": 6, "corresponding_author_ids": ["https://openalex.org/A5086301079"], "corresponding_institution_ids": ["https://openalex.org/I63966007"], "apc_list": null, "apc_paid": null, "fwci": 4.769, "has_fulltext": false, "cited_by_count": 44, "citation_normalized_percentile": {"value": 0.95851478, "is_in_top_1_percent": false, "is_in_top_10_percent": true}, "cited_by_percentile_year": {"min": 89, "max": 99}, "biblio": {"volume": "1", "issue": null, "first_page": "10607", "last_page": "10616"}, "is_retracted": false, "is_paratext": false, "is_xpac": false, "primary_topic": {"id": "https://openalex.org/T10848", "display_name": "Advanced Multi-Objective Optimization Algorithms", "score": 0.9980000257492065, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, "topics": [{"id": "https://openalex.org/T10848", "display_name": "Advanced Multi-Objective Optimization Algorithms", "score": 0.9980000257492065, "subfield": {"id": "https://openalex.org/subfields/1703", "display_name": "Computational Theory and Mathematics"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10791", "display_name": "Advanced Control Systems Optimization", "score": 0.9979000091552734, "subfield": {"id": "https://openalex.org/subfields/2207", "display_name": "Control and Systems Engineering"}, "field": {"id": "https://openalex.org/fields/22", "display_name": "Engineering"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}, {"id": "https://openalex.org/T10462", "display_name": "Reinforcement Learning in Robotics", "score": 0.996999979019165, "subfield": {"id": "https://openalex.org/subfields/1702", "display_name": "Artificial Intelligence"}, "field": {"id": "https://openalex.org/fields/17", "display_name": "Computer Science"}, "domain": {"id": "https://openalex.org/domains/3", "display_name": "Physical Sciences"}}], "keywords": [{"id": "https://openalex.org/keywords/reinforcement-learning", "display_name": "Reinforcement learning", "score": 0.7899052500724792}, {"id": "https://openalex.org/keywords/computer-science", "display_name": "Computer science", "score": 0.6803296208381653}, {"id": "https://openalex.org/keywords/robot", "display_name": "Robot", "score": 0.521960437297821}, {"id": "https://openalex.org/keywords/artificial-intelligence", "display_name": "Artificial intelligence", "score": 0.44404321908950806}, {"id": "https://openalex.org/keywords/control", "display_name": "Control (management)", "score": 0.4398162066936493}, {"id": "https://openalex.org/keywords/robot-control", "display_name": "Robot control", "score": 0.4293113648891449}, {"id": "https://openalex.org/keywords/machine-learning", "display_name": "Machine learning", "score": 0.3650861084461212}, {"id": "https://openalex.org/keywords/mobile-robot", "display_name": "Mobile robot", "score": 0.27789056301116943}], "concepts": [{"id": "https://openalex.org/C97541855", "wikidata": "https://www.wikidata.org/wiki/Q830687", "display_name": "Reinforcement learning", "level": 2, "score": 0.7899052500724792}, {"id": "https://openalex.org/C41008148", "wikidata": "https://www.wikidata.org/wiki/Q21198", "display_name": "Computer science", "level": 0, "score": 0.6803296208381653}, {"id": "https://openalex.org/C90509273", "wikidata": "https://www.wikidata.org/wiki/Q11012", "display_name": "Robot", "level": 2, "score": 0.521960437297821}, {"id": "https://openalex.org/C154945302", "wikidata": "https://www.wikidata.org/wiki/Q11660", "display_name": "Artificial intelligence", "level": 1, "score": 0.44404321908950806}, {"id": "https://openalex.org/C2775924081", "wikidata": "https://www.wikidata.org/wiki/Q55608371", "display_name": "Control (management)", "level": 2, "score": 0.4398162066936493}, {"id": "https://openalex.org/C65401140", "wikidata": "https://www.wikidata.org/wiki/Q7353385", "display_name": "Robot control", "level": 4, "score": 0.4293113648891449}, {"id": "https://openalex.org/C119857082", "wikidata": "https://www.wikidata.org/wiki/Q2539", "display_name": "Machine learning", "level": 1, "score": 0.3650861084461212}, {"id": "https://openalex.org/C19966478", "wikidata": "https://www.wikidata.org/wiki/Q4810574", "display_name": "Mobile robot", "level": 3, "score": 0.27789056301116943}], "mesh": [], "locations_count": 1, "locations": [{"id": "mag:3034757316", "is_oa": false, "landing_page_url": null, "pdf_url": null, "source": null, "license": null, "license_id": null, "version": null, "is_accepted": false, "is_published": null, "raw_source_name": null, "raw_type": null}], "best_oa_location": null, "sustainable_development_goals": [], "awards": [], "funders": [], "has_content": {"pdf": false, "grobid_xml": false}, "content_urls": null, "referenced_works_count": 12, "referenced_works": ["https://openalex.org/W1988210060", "https://openalex.org/W2097031964", "https://openalex.org/W2106334424", "https://openalex.org/W2117428849", "https://openalex.org/W2126105956", "https://openalex.org/W2143381319", "https://openalex.org/W2158782408", "https://openalex.org/W2187089797", "https://openalex.org/W2207649084", "https://openalex.org/W2811172746", "https://openalex.org/W2892165885", "https://openalex.org/W2990239702"], "related_works": ["https://openalex.org/W2913590264", "https://openalex.org/W3154987973", "https://openalex.org/W2847967271", "https://openalex.org/W3013797926", "https://openalex.org/W2394925858", "https://openalex.org/W2352281164", "https://openalex.org/W2721039077", "https://openalex.org/W3139931703", "https://openalex.org/W2240608089", "https://openalex.org/W2902952786", "https://openalex.org/W3138230400", "https://openalex.org/W2329996960", "https://openalex.org/W2004139394", "https://openalex.org/W2791519870", "https://openalex.org/W2479666524", "https://openalex.org/W2625606797", "https://openalex.org/W3034321388", "https://openalex.org/W3182831080", "https://openalex.org/W2393452615", "https://openalex.org/W2160887092"], "abstract_inverted_index": null, "counts_by_year": [{"year": 2025, "cited_by_count": 3}, {"year": 2024, "cited_by_count": 8}, {"year": 2023, "cited_by_count": 12}, {"year": 2022, "cited_by_count": 10}, {"year": 2021, "cited_by_count": 10}, {"year": 2020, "cited_by_count": 1}], "updated_date": "2025-11-06T04:12:42.849631", "created_date": "2025-10-10T00:00:00"}], "group_by": []}