{"id":"https://openalex.org/W4387081555","doi":"https://doi.org/10.1007/s10994-023-06392-z","title":"Model-based trajectory stitching for improved behavioural cloning and its applications","display_name":"Model-based trajectory stitching for improved behavioural cloning and its applications","publication_year":2023,"publication_date":"2023-09-27","ids":{"openalex":"https://openalex.org/W4387081555","doi":"https://doi.org/10.1007/s10994-023-06392-z"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-023-06392-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-023-06392-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06392-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06392-z.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006965239","display_name":"Charles A. Hepburn","orcid":"https://orcid.org/0000-0001-7731-564X"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Charles A. Hepburn","raw_affiliation_strings":["Mathematics Institute, University of Warwick, Coventry, England"],"raw_orcid":"https://orcid.org/0000-0001-7731-564X","affiliations":[{"raw_affiliation_string":"Mathematics Institute, University of Warwick, Coventry, England","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010581004","display_name":"Giovanni Montana","orcid":"https://orcid.org/0000-0003-3942-3900"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]},{"id":"https://openalex.org/I4210128584","display_name":"The Alan Turing Institute","ror":"https://ror.org/035dkdb55","country_code":"GB","type":"facility","lineage":["https://openalex.org/I4210128584"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Giovanni Montana","raw_affiliation_strings":["Alan Turing Institute, London, England","Department of Statistics, University of Warwick, Coventry, England","WMG, University of Warwick, Coventry, England"],"raw_orcid":"https://orcid.org/0000-0003-3942-3900","affiliations":[{"raw_affiliation_string":"Alan Turing Institute, London, England","institution_ids":["https://openalex.org/I4210128584"]},{"raw_affiliation_string":"Department of Statistics, University of Warwick, Coventry, England","institution_ids":["https://openalex.org/I39555362"]},{"raw_affiliation_string":"WMG, University of Warwick, Coventry, England","institution_ids":["https://openalex.org/I39555362"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5010581004"],"corresponding_institution_ids":["https://openalex.org/I39555362","https://openalex.org/I4210128584"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":1.1392,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.82761691,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"113","issue":"2","first_page":"647","last_page":"674"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9634000062942505,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7186697721481323},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7085601091384888},{"id":"https://openalex.org/keywords/image-stitching","display_name":"Image stitching","score":0.6901119947433472},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.656714141368866},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5962412357330322},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.585476815700531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5586954951286316},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5154324769973755},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.5104042887687683},{"id":"https://openalex.org/keywords/cloning","display_name":"Cloning (programming)","score":0.46994784474372864},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.42297613620758057},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4146682918071747}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7186697721481323},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7085601091384888},{"id":"https://openalex.org/C29081049","wikidata":"https://www.wikidata.org/wiki/Q1364242","display_name":"Image stitching","level":2,"score":0.6901119947433472},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.656714141368866},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5962412357330322},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.585476815700531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5586954951286316},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5154324769973755},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.5104042887687683},{"id":"https://openalex.org/C121050878","wikidata":"https://www.wikidata.org/wiki/Q5135020","display_name":"Cloning (programming)","level":2,"score":0.46994784474372864},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.42297613620758057},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4146682918071747},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10994-023-06392-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-023-06392-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06392-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10994-023-06392-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-023-06392-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-023-06392-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7599999904632568}],"awards":[{"id":"https://openalex.org/G1854821999","display_name":null,"funder_award_id":"EP/S022244/1","funder_id":"https://openalex.org/F4320320279","funder_display_name":"University of Warwick"},{"id":"https://openalex.org/G2026327645","display_name":"EPSRC Centre for Doctoral Training in Mathematics for Real-World Systems II","funder_award_id":"EP/S022244/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4160449099","display_name":null,"funder_award_id":"EP/V024868/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8871338198","display_name":null,"funder_award_id":"EP/S022244/1","funder_id":"https://openalex.org/F4320314731","funder_display_name":"UK Research and Innovation"}],"funders":[{"id":"https://openalex.org/F4320314731","display_name":"UK Research and Innovation","ror":"https://ror.org/001aqnf71"},{"id":"https://openalex.org/F4320320279","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387081555.pdf"},"referenced_works_count":53,"referenced_works":["https://openalex.org/W176827812","https://openalex.org/W192920577","https://openalex.org/W1511887321","https://openalex.org/W1515897776","https://openalex.org/W1931877416","https://openalex.org/W1948727931","https://openalex.org/W1965555277","https://openalex.org/W1980035368","https://openalex.org/W2030116982","https://openalex.org/W2051228319","https://openalex.org/W2167224731","https://openalex.org/W2188365844","https://openalex.org/W2290104316","https://openalex.org/W2296673577","https://openalex.org/W2604382266","https://openalex.org/W2739748921","https://openalex.org/W2774354230","https://openalex.org/W2781726626","https://openalex.org/W2787938642","https://openalex.org/W2788862220","https://openalex.org/W2805762288","https://openalex.org/W2810754397","https://openalex.org/W2892110489","https://openalex.org/W2904453761","https://openalex.org/W2947150733","https://openalex.org/W2950624398","https://openalex.org/W2962872206","https://openalex.org/W2963099939","https://openalex.org/W2963411833","https://openalex.org/W2978612180","https://openalex.org/W2982041329","https://openalex.org/W2997101648","https://openalex.org/W3009593063","https://openalex.org/W3025606523","https://openalex.org/W3033324992","https://openalex.org/W3130177876","https://openalex.org/W3135807039","https://openalex.org/W3136208045","https://openalex.org/W3155733121","https://openalex.org/W3172360140","https://openalex.org/W3182474098","https://openalex.org/W3203827806","https://openalex.org/W4212774754","https://openalex.org/W4214717370","https://openalex.org/W4285604474","https://openalex.org/W4387081555","https://openalex.org/W6635516443","https://openalex.org/W6638018090","https://openalex.org/W6678511256","https://openalex.org/W6718092244","https://openalex.org/W6748972340","https://openalex.org/W6777656069","https://openalex.org/W6803550015"],"related_works":["https://openalex.org/W2593649365","https://openalex.org/W2950577464","https://openalex.org/W2979211489","https://openalex.org/W4381245711","https://openalex.org/W4302612983","https://openalex.org/W2485944590","https://openalex.org/W4287077734","https://openalex.org/W3170111948","https://openalex.org/W4300037694","https://openalex.org/W4379260571"],"abstract_inverted_index":{"Abstract":[0],"Behavioural":[1],"cloning":[2],"(BC)":[3],"is":[4,27,65],"a":[5,13,43,174],"commonly":[6],"used":[7],"imitation":[8],"learning":[9,48,237],"method":[10],"to":[11,54,66,79,82,95,105,160,165,172],"infer":[12],"sequential":[14],"decision-making":[15],"policy":[16,33,74,77,247],"from":[17,58,213],"expert":[18],"demonstrations.":[19],"However,":[20],"when":[21],"the":[22,25,30,52,83,97,100,113,144,169,180,192,214,219],"quality":[23,98],"of":[24,99,131,138,168,183],"data":[26,102,118,146],"not":[28],"optimal,":[29],"resulting":[31],"behavioural":[32,107,194],"also":[34],"performs":[35],"sub-optimally":[36],"once":[37],"deployed.":[38],"Recently,":[39],"there":[40],"has":[41],"been":[42],"surge":[44],"in":[45,112,143],"offline":[46,93,236,243],"reinforcement":[47],"methods":[49],"that":[50,140,179,200,225],"hold":[51],"promise":[53],"extract":[55],"high-quality":[56],"policies":[57,108,211],"sub-optimal":[59],"historical":[60],"data.":[61,85,216],"A":[62],"common":[63],"approach":[64,94,120],"perform":[67],"regularisation":[68],"during":[69,73],"training,":[70],"encouraging":[71],"updates":[72],"evaluation":[75],"and/or":[76],"improvement":[78,119],"stay":[80],"close":[81],"underlying":[84,193],"In":[86],"this":[87],"work,":[88],"we":[89,223],"investigate":[90],"whether":[91],"an":[92],"improving":[96],"existing":[101,235],"can":[103,204],"lead":[104],"improved":[106],"without":[109],"any":[110],"changes":[111],"BC":[114,210],"algorithm.":[115],"The":[116],"proposed":[117],"-":[121,126],"Model-Based":[122],"Trajectory":[123],"Stitching":[124],"(MBTS)":[125],"generates":[127],"new":[128,151,156,188],"trajectories":[129,186],"(sequences":[130],"states":[132,139],"and":[133,147,171,246],"actions)":[134],"by":[135,230],"\u2018stitching\u2019":[136],"pairs":[137],"were":[141],"disconnected":[142],"original":[145,215],"generating":[148],"their":[149],"connecting":[150],"action.":[152],"By":[153],"construction,":[154],"these":[155],"transitions":[157],"are":[158,228],"guaranteed":[159],"be":[161,205],"highly":[162],"plausible":[163],"according":[164],"probabilistic":[166],"models":[167],"environment,":[170],"improve":[173],"state-value":[175],"function.":[176],"We":[177],"demonstrate":[178,224],"iterative":[181],"process":[182],"replacing":[184],"old":[185],"with":[187,233],"ones":[189],"incrementally":[190],"improves":[191],"policy.":[195],"Extensive":[196],"experimental":[197],"results":[198,227],"show":[199],"significant":[201],"performance":[202],"gains":[203],"achieved":[206],"using":[207,218],"MBTS":[208,232],"over":[209],"extracted":[212],"Furthermore,":[217],"D4RL":[220],"benchmarking":[221],"suite,":[222],"state-of-the-art":[226],"obtained":[229],"combining":[231],"two":[234],"methodologies":[238],"reliant":[239],"on":[240],"BC,":[241],"model-based":[242],"planning":[244],"(MBOP)":[245],"constraint":[248],"(TD3+BC).":[249]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-10-10T00:00:00"}
