{"id":"https://openalex.org/W4375959112","doi":"https://doi.org/10.48550/arxiv.2305.04361","title":"Truncating Trajectories in Monte Carlo Reinforcement Learning","display_name":"Truncating Trajectories in Monte Carlo Reinforcement Learning","publication_year":2023,"publication_date":"2023-05-07","ids":{"openalex":"https://openalex.org/W4375959112","doi":"https://doi.org/10.48550/arxiv.2305.04361"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2305.04361","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.04361","pdf_url":"https://arxiv.org/pdf/2305.04361","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2305.04361","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081734498","display_name":"Riccardo Poiani","orcid":"https://orcid.org/0000-0002-4026-5623"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Poiani, Riccardo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037963323","display_name":"Alberto Maria Metelli","orcid":"https://orcid.org/0000-0002-3424-5212"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Metelli, Alberto Maria","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5017130830","display_name":"Marcello Restelli","orcid":"https://orcid.org/0000-0002-6322-1076"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Restelli, Marcello","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9718000292778015,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9718000292778015,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9333999752998352,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7590278387069702},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6904730796813965},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.6896381974220276},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.6407715082168579},{"id":"https://openalex.org/keywords/truncation","display_name":"Truncation (statistics)","score":0.6356136798858643},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.5756022334098816},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5647109150886536},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23957982659339905},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2341134250164032},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1780894696712494},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.13550344109535217}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7590278387069702},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6904730796813965},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.6896381974220276},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.6407715082168579},{"id":"https://openalex.org/C106195933","wikidata":"https://www.wikidata.org/wiki/Q7847935","display_name":"Truncation (statistics)","level":2,"score":0.6356136798858643},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.5756022334098816},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5647109150886536},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23957982659339905},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2341134250164032},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1780894696712494},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13550344109535217},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2305.04361","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.04361","pdf_url":"https://arxiv.org/pdf/2305.04361","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2305.04361","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2305.04361","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2305.04361","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.04361","pdf_url":"https://arxiv.org/pdf/2305.04361","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4375959112.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1974935850","https://openalex.org/W2074350688","https://openalex.org/W2364252372","https://openalex.org/W4234066492","https://openalex.org/W2954004777","https://openalex.org/W4321237755","https://openalex.org/W2351788959","https://openalex.org/W2226630547","https://openalex.org/W4248272417","https://openalex.org/W4250457231"],"abstract_inverted_index":{"In":[0,27],"Reinforcement":[1],"Learning":[2],"(RL),":[3],"an":[4,8,19,102,191],"agent":[5,39],"acts":[6],"in":[7,29,81,97,199],"unknown":[9],"environment":[10],"to":[11,109,157],"maximize":[12],"the":[13,24,38,60,64,74,78,110,124,127,131,135,143,181,195],"expected":[14,25,136],"cumulative":[15],"discounted":[16,61],"sum":[17],"of":[18,32,48,63,112,114,126,134,138,146,152,194],"external":[20],"reward":[21],"signal,":[22],"i.e.,":[23,117],"return.":[26],"practice,":[28],"many":[30],"tasks":[31],"interest,":[33],"such":[34],"as":[35],"policy":[36],"optimization,":[37],"usually":[40],"spends":[41],"its":[42],"interaction":[43],"budget":[44,104],"by":[45],"collecting":[46],"episodes":[47],"fixed":[49],"length":[50],"within":[51],"a":[52,92,139,173],"simulator":[53],"(i.e.,":[54],"Monte":[55],"Carlo":[56],"simulation).":[57],"However,":[58],"given":[59],"nature":[62],"RL":[65],"objective,":[66],"this":[67,95,98],"data":[68],"collection":[69,111],"strategy":[70,106],"might":[71],"not":[72],"be":[73],"best":[75],"option.":[76],"Indeed,":[77],"rewards":[79],"taken":[80],"early":[82],"simulation":[83],"steps":[84],"weigh":[85],"exponentially":[86],"more":[87],"than":[88],"future":[89],"rewards.":[90],"Taking":[91],"cue":[93],"from":[94],"intuition,":[96],"paper,":[99],"we":[100,149,171],"design":[101],"a-priori":[103],"allocation":[105],"that":[107,190],"leads":[108],"trajectories":[113,196],"different":[115],"lengths,":[116],"truncated.":[118],"The":[119],"proposed":[120],"approach":[121],"provably":[122],"minimizes":[123],"width":[125],"confidence":[128],"intervals":[129],"around":[130],"empirical":[132],"estimates":[133],"return":[137],"policy.":[140],"After":[141],"discussing":[142],"theoretical":[144],"properties":[145],"our":[147,153,177,186],"method,":[148],"make":[150],"use":[151],"trajectory":[154],"truncation":[155,193],"mechanism":[156],"extend":[158],"Policy":[159],"Optimization":[160],"via":[161],"Importance":[162],"Sampling":[163],"(POIS,":[164],"Metelli":[165],"et":[166],"al.,":[167],"2018)":[168],"algorithm.":[169],"Finally,":[170],"conduct":[172],"numerical":[174],"comparison":[175],"between":[176],"algorithm":[178],"and":[179,188],"POIS:":[180],"results":[182],"are":[183],"consistent":[184],"with":[185],"theory":[187],"show":[189],"appropriate":[192],"can":[197],"succeed":[198],"improving":[200],"performance.":[201]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
