{"id":"https://openalex.org/W3095435181","doi":"https://doi.org/10.1109/icra48506.2021.9561805","title":"Differentiable Physics Models for Real-world Offline Model-based Reinforcement Learning","display_name":"Differentiable Physics Models for Real-world Offline Model-based Reinforcement Learning","publication_year":2021,"publication_date":"2021-05-30","ids":{"openalex":"https://openalex.org/W3095435181","doi":"https://doi.org/10.1109/icra48506.2021.9561805","mag":"3095435181"},"language":"en","primary_location":{"id":"doi:10.1109/icra48506.2021.9561805","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561805","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2011.01734","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042151011","display_name":"Michael Lutter","orcid":"https://orcid.org/0000-0002-9019-6769"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Michael Lutter","raw_affiliation_strings":["Technical University of Darmstadt,Computer Science Department"],"affiliations":[{"raw_affiliation_string":"Technical University of Darmstadt,Computer Science Department","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087717746","display_name":"Johannes Silberbauer","orcid":null},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Johannes Silberbauer","raw_affiliation_strings":["Technical University of Darmstadt,Computer Science Department"],"affiliations":[{"raw_affiliation_string":"Technical University of Darmstadt,Computer Science Department","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102799337","display_name":"Joe Watson","orcid":"https://orcid.org/0000-0003-2354-3369"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Joe Watson","raw_affiliation_strings":["Technical University of Darmstadt,Computer Science Department"],"affiliations":[{"raw_affiliation_string":"Technical University of Darmstadt,Computer Science Department","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071367253","display_name":"Jan Peters","orcid":"https://orcid.org/0000-0002-5266-8091"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jan Peters","raw_affiliation_strings":["Technical University of Darmstadt,Computer Science Department"],"affiliations":[{"raw_affiliation_string":"Technical University of Darmstadt,Computer Science Department","institution_ids":["https://openalex.org/I31512782"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5042151011"],"corresponding_institution_ids":["https://openalex.org/I31512782"],"apc_list":null,"apc_paid":null,"fwci":0.28,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.60882421,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"4163","last_page":"4170"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9580000042915344,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12810","display_name":"Real-time simulation and control systems","score":0.9510999917984009,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7449498176574707},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5325649380683899},{"id":"https://openalex.org/keywords/holonomic","display_name":"Holonomic","score":0.5257911682128906},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.5167802572250366},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.5020825862884521},{"id":"https://openalex.org/keywords/black-box","display_name":"Black box","score":0.48330116271972656},{"id":"https://openalex.org/keywords/physics-engine","display_name":"Physics engine","score":0.45100700855255127},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.44408750534057617},{"id":"https://openalex.org/keywords/system-identification","display_name":"System identification","score":0.42696571350097656},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39931583404541016},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.2004176378250122},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.19461581110954285}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7449498176574707},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5325649380683899},{"id":"https://openalex.org/C2777964439","wikidata":"https://www.wikidata.org/wiki/Q5884201","display_name":"Holonomic","level":2,"score":0.5257911682128906},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.5167802572250366},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.5020825862884521},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.48330116271972656},{"id":"https://openalex.org/C190390380","wikidata":"https://www.wikidata.org/wiki/Q62505","display_name":"Physics engine","level":2,"score":0.45100700855255127},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.44408750534057617},{"id":"https://openalex.org/C119247159","wikidata":"https://www.wikidata.org/wiki/Q1366192","display_name":"System identification","level":3,"score":0.42696571350097656},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39931583404541016},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2004176378250122},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.19461581110954285},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icra48506.2021.9561805","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561805","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2011.01734","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.01734","pdf_url":"https://arxiv.org/pdf/2011.01734","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3095435181","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2011.01734","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2011.01734","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2011.01734","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2011.01734","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.01734","pdf_url":"https://arxiv.org/pdf/2011.01734","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.4300000071525574,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3095435181.pdf","grobid_xml":"https://content.openalex.org/works/W3095435181.grobid-xml"},"referenced_works_count":76,"referenced_works":["https://openalex.org/W192920577","https://openalex.org/W618254468","https://openalex.org/W1569654252","https://openalex.org/W1597173708","https://openalex.org/W1949974402","https://openalex.org/W1993300073","https://openalex.org/W1998179438","https://openalex.org/W2001524797","https://openalex.org/W2007864935","https://openalex.org/W2012587148","https://openalex.org/W2064675550","https://openalex.org/W2140801763","https://openalex.org/W2158208985","https://openalex.org/W2158782408","https://openalex.org/W2162977544","https://openalex.org/W2167117957","https://openalex.org/W2539678416","https://openalex.org/W2556096037","https://openalex.org/W2735021678","https://openalex.org/W2804072623","https://openalex.org/W2805883505","https://openalex.org/W2891122218","https://openalex.org/W2911087563","https://openalex.org/W2914688076","https://openalex.org/W2917939519","https://openalex.org/W2946544065","https://openalex.org/W2948443723","https://openalex.org/W2953046228","https://openalex.org/W2953708620","https://openalex.org/W2960705509","https://openalex.org/W2962730452","https://openalex.org/W2963906246","https://openalex.org/W2963960193","https://openalex.org/W2968116426","https://openalex.org/W2970277495","https://openalex.org/W2971162686","https://openalex.org/W2975293371","https://openalex.org/W2978940263","https://openalex.org/W3004137006","https://openalex.org/W3005143466","https://openalex.org/W3007913393","https://openalex.org/W3012294986","https://openalex.org/W3022566517","https://openalex.org/W3029221344","https://openalex.org/W3029529904","https://openalex.org/W3037817062","https://openalex.org/W3038540964","https://openalex.org/W3042772898","https://openalex.org/W3092352130","https://openalex.org/W3094631394","https://openalex.org/W3104994358","https://openalex.org/W3109952375","https://openalex.org/W3208165232","https://openalex.org/W4206215385","https://openalex.org/W4292408540","https://openalex.org/W6653435097","https://openalex.org/W6653548176","https://openalex.org/W6680971464","https://openalex.org/W6751494529","https://openalex.org/W6758539823","https://openalex.org/W6759989956","https://openalex.org/W6763108371","https://openalex.org/W6763340073","https://openalex.org/W6764053384","https://openalex.org/W6764649563","https://openalex.org/W6766161879","https://openalex.org/W6768242542","https://openalex.org/W6768771297","https://openalex.org/W6769017845","https://openalex.org/W6772877411","https://openalex.org/W6773846689","https://openalex.org/W6775127026","https://openalex.org/W6776438516","https://openalex.org/W6784062865","https://openalex.org/W6784392571","https://openalex.org/W6823269322"],"related_works":["https://openalex.org/W3101109908","https://openalex.org/W2946544065","https://openalex.org/W3133031903","https://openalex.org/W3131537992","https://openalex.org/W2962872206","https://openalex.org/W2892053860","https://openalex.org/W2786019934","https://openalex.org/W3183715768","https://openalex.org/W3130076912","https://openalex.org/W2968854004","https://openalex.org/W3048481719","https://openalex.org/W3089483717","https://openalex.org/W3142960102","https://openalex.org/W2198225532","https://openalex.org/W1555368087","https://openalex.org/W2589385607","https://openalex.org/W3094183482","https://openalex.org/W2762453223","https://openalex.org/W2223664760","https://openalex.org/W3104268834"],"abstract_inverted_index":{"A":[0],"limitation":[1],"of":[2,10,31,46,60,114,156],"model-based":[3,73],"reinforcement":[4,74],"learning":[5,75],"(MBRL)":[6],"is":[7,28,91],"the":[8,13,32,43,53,58,71,88,99,147,154],"exploitation":[9],"errors":[11],"in":[12,52,101],"learned":[14],"models.":[15],"Blackbox":[16],"models":[17,36,78,94,124],"can":[18,79,95],"fit":[19],"complex":[20],"dynamics":[21,169],"with":[22,167],"high":[23],"fidelity,":[24],"but":[25,50],"their":[26,47],"behavior":[27],"undefined":[29],"outside":[30],"data":[33,116,145],"distribution.":[34],"Physics-based":[35,93],"are":[37],"better":[38],"at":[39],"extrapolating,":[40],"due":[41,56],"to":[42,57,83,97,136,143,165],"general":[44],"validity":[45],"informed":[48],"structure,":[49],"underfit":[51],"real":[54],"world":[55],"presence":[59],"unmodeled":[61],"phenomena.":[62],"In":[63,150],"this":[64],"work,":[65],"we":[66,152],"demonstrate":[67],"experimentally":[68],"that":[69,122],"for":[70,129],"offline":[72,118],"setting,":[76],"physics-based":[77,148],"be":[80],"beneficial":[81],"compared":[82],"high-capacity":[84],"function":[85],"approximators":[86],"if":[87],"mechanical":[89],"structure":[90],"known.":[92],"learn":[96],"perform":[98],"ball":[100],"a":[102,107],"cup":[103],"(BiC)":[104],"task":[105],"on":[106],"physical":[108],"manipulator":[109],"using":[110,117,170],"only":[111],"4":[112],"minutes":[113],"sampled":[115],"MBRL.":[119],"We":[120],"find":[121],"black-box":[123],"consistently":[125],"produce":[126],"unviable":[127],"policies":[128],"BiC":[130],"as":[131],"all":[132],"predicted":[133],"trajectories":[134],"diverge":[135],"physically":[137],"impossible":[138],"state,":[139],"despite":[140],"having":[141],"access":[142],"more":[144],"than":[146],"model.":[149],"addition,":[151],"generalize":[153],"approach":[155],"physics":[157],"parameter":[158],"identification":[159],"from":[160],"modeling":[161],"holonomic":[162],"multi-body":[163],"systems":[164,166],"nonholonomic":[168],"end-to-end":[171],"automatic":[172],"differentiation.Videos:":[173],"https://sites.google.com/view/ball-in-a-cup-in-4-minutes/":[174]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
