{"id":"https://openalex.org/W7128715952","doi":"https://doi.org/10.48550/arxiv.2602.10305","title":"Confounding Robust Continuous Control via Automatic Reward Shaping","display_name":"Confounding Robust Continuous Control via Automatic Reward Shaping","publication_year":2026,"publication_date":"2026-02-10","ids":{"openalex":"https://openalex.org/W7128715952","doi":"https://doi.org/10.48550/arxiv.2602.10305"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.10305","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125735804","display_name":"Mateo Juliani","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Juliani, Mateo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125712488","display_name":"Mingxuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Mingxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5039620960","display_name":"Elias Bareinboim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bareinboim, Elias","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5125735804"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7633000016212463,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7633000016212463,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.06279999762773514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10919","display_name":"Neurological disorders and treatments","score":0.012299999594688416,"subfield":{"id":"https://openalex.org/subfields/2728","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5784000158309937},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5443999767303467},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.46299999952316284},{"id":"https://openalex.org/keywords/confounding","display_name":"Confounding","score":0.38920000195503235},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.3725999891757965},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.3718999922275543},{"id":"https://openalex.org/keywords/robust-control","display_name":"Robust control","score":0.36480000615119934}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6446999907493591},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5784000158309937},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5443999767303467},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.46299999952316284},{"id":"https://openalex.org/C77350462","wikidata":"https://www.wikidata.org/wiki/Q1125472","display_name":"Confounding","level":2,"score":0.38920000195503235},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.3725999891757965},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.3718999922275543},{"id":"https://openalex.org/C31531917","wikidata":"https://www.wikidata.org/wiki/Q915157","display_name":"Robust control","level":3,"score":0.36480000615119934},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.323199987411499},{"id":"https://openalex.org/C2982888158","wikidata":"https://www.wikidata.org/wiki/Q5165874","display_name":"Control function","level":3,"score":0.31360000371932983},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.30889999866485596},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3052999973297119},{"id":"https://openalex.org/C2776902269","wikidata":"https://www.wikidata.org/wiki/Q5165493","display_name":"Continuous monitoring","level":2,"score":0.2863999903202057},{"id":"https://openalex.org/C163504300","wikidata":"https://www.wikidata.org/wiki/Q2364925","display_name":"Causal structure","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C167123822","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automatic control","level":2,"score":0.25699999928474426}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.10305","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.10305","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.10305","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.10305","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reward":[0,89],"shaping":[1,21,42,96,141],"has":[2],"been":[3],"applied":[4],"widely":[5],"to":[6,37,68],"accelerate":[7],"Reinforcement":[8],"Learning":[9],"(RL)":[10],"agents'":[11],"training.":[12],"However,":[13],"a":[14,40,70,123,133],"principled":[15],"way":[16],"of":[17],"designing":[18],"effective":[19],"reward":[20,41,95,140],"functions,":[22],"especially":[23],"for":[24,44,137],"complex":[25],"continuous":[26,45,107,130],"control":[27,46,108,131],"problems,":[28],"remains":[29],"largely":[30],"under-explained.":[31],"In":[32],"this":[33],"work,":[34],"we":[35],"propose":[36],"automatically":[38],"learn":[39,69],"function":[43],"problems":[47],"from":[48,132],"offline":[49],"datasets,":[50],"potentially":[51],"contaminated":[52],"by":[53],"unobserved":[54,116],"confounding":[55,128],"variables.":[56],"Specifically,":[57],"our":[58,120,139],"method":[59],"builds":[60],"upon":[61],"the":[62,75,84,87],"recently":[63],"proposed":[64,94],"causal":[65,134],"Bellman":[66],"equation":[67],"tight":[71],"upper":[72],"bound":[73],"on":[74,103],"optimal":[76],"state":[77],"values,":[78],"which":[79],"is":[80,98],"then":[81],"used":[82,106],"as":[83],"potentials":[85],"in":[86],"Potential-Based":[88],"Shaping":[90],"(PBRS)":[91],"framework.":[92],"Our":[93],"algorithm":[97],"tested":[99],"with":[100],"Soft-Actor-Critic":[101],"(SAC)":[102],"multiple":[104],"commonly":[105],"benchmarks":[109],"and":[110],"exhibits":[111],"strong":[112],"performance":[113],"guarantees":[114],"under":[115],"confounders.":[117],"More":[118],"broadly,":[119],"work":[121],"marks":[122],"solid":[124],"first":[125],"step":[126],"towards":[127],"robust":[129],"perspective.":[135],"Code":[136],"training":[138],"functions":[142],"can":[143],"be":[144],"found":[145],"at":[146],"https://github.com/mateojuliani/confounding_robust_cont_control.":[147]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-13T00:00:00"}
