{"id":"https://openalex.org/W2592771977","doi":"https://doi.org/10.3233/idt-170285","title":"Small-sample reinforcement learning: Improving policies using synthetic data1","display_name":"Small-sample reinforcement learning: Improving policies using synthetic data1","publication_year":2017,"publication_date":"2017-03-06","ids":{"openalex":"https://openalex.org/W2592771977","doi":"https://doi.org/10.3233/idt-170285","mag":"2592771977"},"language":"en","primary_location":{"id":"doi:10.3233/idt-170285","is_oa":false,"landing_page_url":"https://doi.org/10.3233/idt-170285","pdf_url":null,"source":{"id":"https://openalex.org/S119727669","display_name":"Intelligent Decision Technologies","issn_l":"1872-4981","issn":["1872-4981","1875-8843"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Intelligent Decision Technologies","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027945041","display_name":"Stephen W. Carden","orcid":"https://orcid.org/0000-0002-4793-7883"},"institutions":[{"id":"https://openalex.org/I39815113","display_name":"Georgia Southern University","ror":"https://ror.org/04agmb972","country_code":"US","type":"education","lineage":["https://openalex.org/I39815113"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Stephen W. Carden","raw_affiliation_strings":["Department of Mathematical Sciences, Georgia Southern University, Statesboro, GA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Mathematical Sciences, Georgia Southern University, Statesboro, GA, USA","institution_ids":["https://openalex.org/I39815113"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049873324","display_name":"James Livsey","orcid":"https://orcid.org/0000-0002-4033-4828"},"institutions":[{"id":"https://openalex.org/I1333512998","display_name":"United States Census Bureau","ror":"https://ror.org/01qn7cs15","country_code":"US","type":"funder","lineage":["https://openalex.org/I1333512998","https://openalex.org/I1343035065"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Livsey","raw_affiliation_strings":["Center for Statistical Research and Methodology, U.S. Census Bureau, Washington, DC, USA"],"affiliations":[{"raw_affiliation_string":"Center for Statistical Research and Methodology, U.S. Census Bureau, Washington, DC, USA","institution_ids":["https://openalex.org/I1333512998"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5027945041"],"corresponding_institution_ids":["https://openalex.org/I39815113"],"apc_list":null,"apc_paid":null,"fwci":0.195,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.59083344,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"11","issue":"2","first_page":"167","last_page":"175"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8901627063751221},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6274247765541077},{"id":"https://openalex.org/keywords/nonparametric-statistics","display_name":"Nonparametric statistics","score":0.5802301168441772},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5730485320091248},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.542752742767334},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5403352975845337},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5296813249588013},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5020561218261719},{"id":"https://openalex.org/keywords/norm","display_name":"Norm (philosophy)","score":0.4412389397621155},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.18079522252082825},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16151979565620422}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8901627063751221},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6274247765541077},{"id":"https://openalex.org/C102366305","wikidata":"https://www.wikidata.org/wiki/Q1097688","display_name":"Nonparametric statistics","level":2,"score":0.5802301168441772},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5730485320091248},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.542752742767334},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5403352975845337},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5296813249588013},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5020561218261719},{"id":"https://openalex.org/C191795146","wikidata":"https://www.wikidata.org/wiki/Q3878446","display_name":"Norm (philosophy)","level":2,"score":0.4412389397621155},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.18079522252082825},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16151979565620422},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/idt-170285","is_oa":false,"landing_page_url":"https://doi.org/10.3233/idt-170285","pdf_url":null,"source":{"id":"https://openalex.org/S119727669","display_name":"Intelligent Decision Technologies","issn_l":"1872-4981","issn":["1872-4981","1875-8843"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Intelligent Decision Technologies","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W15601695","https://openalex.org/W107583932","https://openalex.org/W1493327649","https://openalex.org/W1512919909","https://openalex.org/W1576516403","https://openalex.org/W1844343792","https://openalex.org/W1971713783","https://openalex.org/W1977405703","https://openalex.org/W1984819428","https://openalex.org/W1991513691","https://openalex.org/W2027795129","https://openalex.org/W2107480868","https://openalex.org/W2120346334","https://openalex.org/W2136283380","https://openalex.org/W2160279936","https://openalex.org/W2211925278","https://openalex.org/W2266890668","https://openalex.org/W2334782222","https://openalex.org/W2963891150","https://openalex.org/W3011120880","https://openalex.org/W4214717370","https://openalex.org/W6600624752","https://openalex.org/W6677737365","https://openalex.org/W6688384279"],"related_works":["https://openalex.org/W4365211920","https://openalex.org/W3014948380","https://openalex.org/W4380551139","https://openalex.org/W4317695495","https://openalex.org/W2280377497","https://openalex.org/W4387506531","https://openalex.org/W4238433571","https://openalex.org/W3174044702","https://openalex.org/W2967848559","https://openalex.org/W4283803360"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,7,86],"(RL)":[2],"concerns":[3],"algorithms":[4],"tasked":[5],"with":[6,13,132],"optimal":[8,87],"control":[9],"policies":[10,88],"by":[11,89],"interacting":[12],"or":[14],"observing":[15],"a":[16,42,82,117,124,130],"system.":[17],"In":[18],"computer":[19],"science":[20],"and":[21],"other":[22],"fields":[23],"in":[24,54,58,67],"which":[25,106],"RL":[26,47],"originated,":[27],"large":[28],"sample":[29,61],"sizes":[30],"are":[31,65,70],"the":[32,91,96,120],"norm,":[33],"because":[34],"data":[35,76,108],"can":[36,109],"be":[37,110],"generated":[38],"at":[39],"will":[40],"from":[41,105],"generative":[43,103],"model.":[44],"Recen":[45],"tly,":[46],"methods":[48,64],"have":[49],"been":[50],"adapted":[51],"for":[52,85,128],"use":[53],"clinical":[55],"trials,":[56],"resulting":[57],"much":[59],"smaller":[60],"sizes.":[62],"Nonparametric":[63],"common":[66],"RL,":[68],"but":[69],"likely":[71],"to":[72,116],"over-generalize":[73],"when":[74],"limited":[75],"is":[77,114],"available.":[78],"This":[79],"paper":[80],"proposes":[81],"novel":[83],"methodology":[84],"leveraging":[90],"researcher's":[92],"partial":[93],"knowledge":[94],"about":[95],"probability":[97],"transition":[98],"structure":[99],"into":[100],"an":[101],"approximate":[102],"model":[104],"synthetic":[107],"produced.":[111],"Our":[112],"method":[113],"applied":[115],"scenario":[118],"where":[119],"researcher":[121],"must":[122],"create":[123],"medical":[125],"prescription":[126],"policy":[127],"managing":[129],"disease":[131],"sporadically":[133],"appearing":[134],"symptoms.":[135]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
