{"id":"https://openalex.org/W4387171559","doi":"https://doi.org/10.3233/faia230618","title":"Uncertainty-Driven Trajectory Truncation for Data Augmentation in Offline Reinforcement Learning","display_name":"Uncertainty-Driven Trajectory Truncation for Data Augmentation in Offline Reinforcement Learning","publication_year":2023,"publication_date":"2023-09-28","ids":{"openalex":"https://openalex.org/W4387171559","doi":"https://doi.org/10.3233/faia230618"},"language":"en","primary_location":{"id":"doi:10.3233/faia230618","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3233/faia230618","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA230618","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA230618","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100343044","display_name":"Junjie Zhang","orcid":"https://orcid.org/0000-0002-0033-0494"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junjie Zhang","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054042368","display_name":"Jiafei Lyu","orcid":"https://orcid.org/0000-0002-0842-7151"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiafei Lyu","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008825234","display_name":"Xiaoteng Ma","orcid":"https://orcid.org/0000-0003-4806-6458"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoteng Ma","raw_affiliation_strings":["Department of Automation, Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013716631","display_name":"Jiangpeng Yan","orcid":"https://orcid.org/0000-0002-0767-1726"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangpeng Yan","raw_affiliation_strings":["Department of Automation, Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042474689","display_name":"Jun Yang","orcid":"https://orcid.org/0000-0002-9401-3528"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yang","raw_affiliation_strings":["Department of Automation, Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104145590","display_name":"Le Wan","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Le Wan","raw_affiliation_strings":["IEG, Tencent"],"affiliations":[{"raw_affiliation_string":"IEG, Tencent","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100754504","display_name":"Xiu Li","orcid":"https://orcid.org/0000-0003-0403-1923"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiu Li","raw_affiliation_strings":["Tsinghua Shenzhen International Graduate School, Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua Shenzhen International Graduate School, Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100754504"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.0413,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.78767576,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9120000004768372,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7815108299255371},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7539511919021606},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7096632719039917},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.6933952569961548},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.6612427830696106},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5791893601417542},{"id":"https://openalex.org/keywords/truncation","display_name":"Truncation (statistics)","score":0.5636012554168701},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4571291506290436},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.4210788905620575},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.15471842885017395}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7815108299255371},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7539511919021606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7096632719039917},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.6933952569961548},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.6612427830696106},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5791893601417542},{"id":"https://openalex.org/C106195933","wikidata":"https://www.wikidata.org/wiki/Q7847935","display_name":"Truncation (statistics)","level":2,"score":0.5636012554168701},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4571291506290436},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.4210788905620575},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.15471842885017395},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia230618","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3233/faia230618","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA230618","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia230618","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3233/faia230618","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA230618","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387171559.pdf","grobid_xml":"https://content.openalex.org/works/W4387171559.grobid-xml"},"referenced_works_count":56,"referenced_works":["https://openalex.org/W192920577","https://openalex.org/W1959608418","https://openalex.org/W2125389028","https://openalex.org/W2575705757","https://openalex.org/W2583993537","https://openalex.org/W2904453761","https://openalex.org/W2936304709","https://openalex.org/W2947150733","https://openalex.org/W2950624398","https://openalex.org/W2991355586","https://openalex.org/W2993185773","https://openalex.org/W3016525976","https://openalex.org/W3025606523","https://openalex.org/W3028766998","https://openalex.org/W3033324992","https://openalex.org/W3034084488","https://openalex.org/W3036167779","https://openalex.org/W3048481719","https://openalex.org/W3092490845","https://openalex.org/W3100944043","https://openalex.org/W3101483449","https://openalex.org/W3122329924","https://openalex.org/W3130177876","https://openalex.org/W3133826166","https://openalex.org/W3136208045","https://openalex.org/W3162450516","https://openalex.org/W3165994454","https://openalex.org/W3166795773","https://openalex.org/W3167222229","https://openalex.org/W3169291081","https://openalex.org/W3170016383","https://openalex.org/W3170059879","https://openalex.org/W3172360140","https://openalex.org/W3178225342","https://openalex.org/W3179631121","https://openalex.org/W3184258323","https://openalex.org/W3200377072","https://openalex.org/W3201700917","https://openalex.org/W3202125656","https://openalex.org/W3203827806","https://openalex.org/W3205279445","https://openalex.org/W3205794883","https://openalex.org/W4221158443","https://openalex.org/W4225110331","https://openalex.org/W4225623389","https://openalex.org/W4282813201","https://openalex.org/W4283076713","https://openalex.org/W4285604474","https://openalex.org/W4287080123","https://openalex.org/W4287118283","https://openalex.org/W4287278746","https://openalex.org/W4287689949","https://openalex.org/W4288319859","https://openalex.org/W4301430420","https://openalex.org/W4306177351","https://openalex.org/W4306295204"],"related_works":["https://openalex.org/W3153007185","https://openalex.org/W4225619808","https://openalex.org/W2983785000","https://openalex.org/W3212439828","https://openalex.org/W4319083788","https://openalex.org/W3131920644","https://openalex.org/W3037024314","https://openalex.org/W4385682020","https://openalex.org/W4297792190","https://openalex.org/W3016525976"],"abstract_inverted_index":{"Equipped":[0],"with":[1,24,68,110,124],"the":[2,35,39,53,57,74,78,82,90,102,136],"trained":[3,40],"environmental":[4],"dynamics,":[5],"model-based":[6,113],"offline":[7,114,128],"reinforcement":[8],"learning":[9],"(RL)":[10],"algorithms":[11],"can":[12,30],"often":[13,146],"successfully":[14],"learn":[15],"good":[16],"policies":[17],"from":[18,38],"fixed-sized":[19],"datasets,":[20],"even":[21],"some":[22,46],"datasets":[23],"poor":[25],"quality.":[26],"Unfortunately,":[27],"however,":[28],"it":[29,109],"not":[31],"be":[32],"guaranteed":[33],"that":[34,140],"generated":[36],"samples":[37,48],"dynamics":[41],"model":[42],"are":[43],"reliable":[44],"(e.g.,":[45],"synthetic":[47,75],"may":[49],"lie":[50],"outside":[51],"of":[52,56,93,104],"support":[54],"region":[55],"static":[58],"dataset).":[59],"To":[60,99],"address":[61],"this":[62],"issue,":[63],"we":[64,106,121],"propose":[65],"Trajectory":[66],"Truncation":[67],"Uncertainty":[69],"(TATU),":[70],"which":[71],"adaptively":[72],"truncates":[73],"trajectory":[76,83],"if":[77],"accumulated":[79],"uncertainty":[80],"along":[81],"is":[84,152],"too":[85],"large.":[86],"We":[87],"theoretically":[88],"show":[89,101,139],"performance":[91],"bound":[92],"TATU":[94,123,141],"to":[95],"justify":[96],"its":[97],"benefits.":[98],"empirically":[100],"advantages":[103],"TATU,":[105],"first":[107],"combine":[108],"two":[111],"classical":[112],"RL":[115,129],"algorithms,":[116,130],"MOPO":[117],"and":[118],"COMBO.":[119],"Furthermore,":[120],"integrate":[122],"several":[125],"off-the-shelf":[126],"model-free":[127],"e.g.,":[131],"BCQ.":[132],"Experimental":[133],"results":[134],"on":[135],"D4RL":[137],"benchmark":[138],"significantly":[142],"improves":[143],"their":[144],"performance,":[145],"by":[147],"a":[148],"large":[149],"margin.":[150],"Code":[151],"available":[153],"here.":[154]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
