{"id":"https://openalex.org/W2765915777","doi":"https://doi.org/10.1142/s0218001418590097","title":"Single Trajectory Learning: Exploration Versus Exploitation","display_name":"Single Trajectory Learning: Exploration Versus Exploitation","publication_year":2017,"publication_date":"2017-10-19","ids":{"openalex":"https://openalex.org/W2765915777","doi":"https://doi.org/10.1142/s0218001418590097","mag":"2765915777"},"language":"en","primary_location":{"id":"doi:10.1142/s0218001418590097","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218001418590097","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101997111","display_name":"Qiming Fu","orcid":"https://orcid.org/0000-0002-8720-9071"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qiming Fu","raw_affiliation_strings":["College of Electronic and Information Engineering, Suzhou University of Science and Technology, Suzhou 215000, Jiangsu, P. R. China","Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","Suzhou Key Laboratory of Mobile Network Technology and Application, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Suzhou University of Science and Technology, Suzhou 215000, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"Suzhou Key Laboratory of Mobile Network Technology and Application, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100666897","display_name":"Quan Liu","orcid":"https://orcid.org/0000-0002-0941-8767"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]},{"id":"https://openalex.org/I4210159340","display_name":"The Synergetic Innovation Center for Advanced Materials","ror":"https://ror.org/05nzc1r88","country_code":"CN","type":"facility","lineage":["https://openalex.org/I134687103","https://openalex.org/I41198531","https://openalex.org/I4210159340","https://openalex.org/I76130692","https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Liu","raw_affiliation_strings":["Collaborative Innovation Center of Novel Software, Technology and Industrialization Nanjing, Jiangsu 210000, P. R. China","Key Laboratory of Symbolic Computation and Knowledge, Engineering of Ministry of Education, Jilin University, Changchun, Jilin 130012, P. R. China","School of Computer Science and Technology, Soochow University, Suzhou 215000, Jiangsu, P. R. China"],"affiliations":[{"raw_affiliation_string":"Collaborative Innovation Center of Novel Software, Technology and Industrialization Nanjing, Jiangsu 210000, P. R. China","institution_ids":["https://openalex.org/I4210159340"]},{"raw_affiliation_string":"Key Laboratory of Symbolic Computation and Knowledge, Engineering of Ministry of Education, Jilin University, Changchun, Jilin 130012, P. R. China","institution_ids":["https://openalex.org/I194450716"]},{"raw_affiliation_string":"School of Computer Science and Technology, Soochow University, Suzhou 215000, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101902909","display_name":"Shan Zhong","orcid":"https://orcid.org/0000-0003-0034-6952"},"institutions":[{"id":"https://openalex.org/I21741975","display_name":"Changshu Institute of Technology","ror":"https://ror.org/05g6ben79","country_code":"CN","type":"education","lineage":["https://openalex.org/I21741975"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shan Zhong","raw_affiliation_strings":["School of Computer Science and Technology, Changshu Institute of Technology, Suzhou, Jiangsu 215500, P. R. China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Changshu Institute of Technology, Suzhou, Jiangsu 215500, P. R. China","institution_ids":["https://openalex.org/I21741975"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101498950","display_name":"Heng Luo","orcid":"https://orcid.org/0000-0003-2340-6241"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heng Luo","raw_affiliation_strings":["College of Electronic and Information Engineering, Suzhou University of Science and Technology, Suzhou 215000, Jiangsu, P. R. China","Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","Suzhou Key Laboratory of Mobile Network Technology and Application, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Suzhou University of Science and Technology, Suzhou 215000, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"Suzhou Key Laboratory of Mobile Network Technology and Application, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076525932","display_name":"Hongjie Wu","orcid":"https://orcid.org/0000-0001-5921-8707"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongjie Wu","raw_affiliation_strings":["College of Electronic and Information Engineering, Suzhou University of Science and Technology, Suzhou 215000, Jiangsu, P. R. China","Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Suzhou University of Science and Technology, Suzhou 215000, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100437534","display_name":"Jianping Chen","orcid":"https://orcid.org/0000-0002-2109-5761"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianping Chen","raw_affiliation_strings":["College of Electronic and Information Engineering, Suzhou University of Science and Technology, Suzhou 215000, Jiangsu, P. R. China","Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","Suzhou Key Laboratory of Mobile Network Technology and Application, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Suzhou University of Science and Technology, Suzhou 215000, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"Jiangsu Province Key Laboratory of Intelligent Building Energy Efficiency, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]},{"raw_affiliation_string":"Suzhou Key Laboratory of Mobile Network Technology and Application, Suzhou University of Science and Technology, Suzhou 215009, Jiangsu, P. R. China","institution_ids":["https://openalex.org/I308837"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101997111"],"corresponding_institution_ids":["https://openalex.org/I308837"],"apc_list":null,"apc_paid":null,"fwci":0.39,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.7123989,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"32","issue":"06","first_page":"1859009","last_page":"1859009"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.965499997138977,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.8354365825653076},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7321284413337708},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7214258909225464},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5928712487220764},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5725575089454651},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5335875749588013},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.525375247001648},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5202966332435608},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.47958651185035706},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.46879658102989197},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4357791244983673},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.43156522512435913},{"id":"https://openalex.org/keywords/empirical-distribution-function","display_name":"Empirical distribution function","score":0.4153686761856079},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18317541480064392},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1792874038219452}],"concepts":[{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.8354365825653076},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7321284413337708},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7214258909225464},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5928712487220764},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5725575089454651},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5335875749588013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.525375247001648},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5202966332435608},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.47958651185035706},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.46879658102989197},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4357791244983673},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.43156522512435913},{"id":"https://openalex.org/C98385598","wikidata":"https://www.wikidata.org/wiki/Q1339385","display_name":"Empirical distribution function","level":2,"score":0.4153686761856079},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18317541480064392},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1792874038219452},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0218001418590097","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218001418590097","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1506859583","https://openalex.org/W1515933446","https://openalex.org/W2121616705","https://openalex.org/W2122162550","https://openalex.org/W2126848223","https://openalex.org/W2236244207","https://openalex.org/W2331039021","https://openalex.org/W2963301691"],"related_works":["https://openalex.org/W4376155396","https://openalex.org/W2971351794","https://openalex.org/W1947085858","https://openalex.org/W2101991911","https://openalex.org/W2174986909","https://openalex.org/W2527791220","https://openalex.org/W2155070487","https://openalex.org/W4311589891","https://openalex.org/W3123835761","https://openalex.org/W4292701710"],"abstract_inverted_index":{"In":[0,127],"reinforcement":[1],"learning":[2],"(RL),":[3],"the":[4,20,23,31,34,39,45,68,74,87,98,133,139,152,159,173],"exploration/exploitation":[5],"(E/E)":[6],"dilemma":[7],"is":[8,53,94,116],"a":[9,56,81,104,109],"very":[10],"crucial":[11],"issue,":[12],"which":[13,115],"can":[14],"be":[15],"described":[16],"as":[17],"searching":[18],"between":[19],"exploration":[21],"of":[22,33,112,125,158],"environment":[24],"to":[25,65,79,85],"find":[26,80],"more":[27],"profitable":[28],"actions,":[29],"and":[30,63,146,154],"exploitation":[32],"best":[35,99],"empirical":[36],"actions":[37],"for":[38],"current":[40],"state.":[41],"We":[42,137,150,168],"focus":[43],"on":[44,143],"single":[46,61,120],"trajectory":[47],"RL":[48,176],"problem":[49],"where":[50],"an":[51,163],"agent":[52],"interacting":[54],"with":[55,67,172],"partially":[57],"unknown":[58],"MDP":[59,91,106,165],"over":[60,103],"trajectories,":[62],"try":[64,78],"deal":[66],"E/E":[69,83],"in":[70,101],"this":[71,128],"setting.":[72],"Given":[73],"reward":[75],"function,":[76],"we":[77,130],"good":[82],"strategy":[84,100,161],"address":[86],"MDPs":[88],"under":[89,162],"some":[90],"distribution.":[92,166],"This":[93],"achieved":[95],"by":[96,118],"selecting":[97],"mean":[102],"potential":[105],"distribution":[107],"from":[108,123],"large":[110],"set":[111,145],"candidate":[113],"strategies,":[114],"done":[117],"exploiting":[119],"trajectories":[121],"drawn":[122],"plenty":[124],"MDPs.":[126],"paper,":[129],"mainly":[131],"make":[132],"following":[134],"contributions:":[135],"(1)":[136],"discuss":[138],"strategy-selector":[140],"algorithm":[141],"based":[142],"formula":[144],"polynomial":[147],"function.":[148],"(2)":[149],"provide":[151],"theoretical":[153],"experimental":[155],"regret":[156],"analysis":[157],"learned":[160],"given":[164],"(3)":[167],"compare":[169],"these":[170],"methods":[171],"\u201cstate-of-the-art\u201d":[174],"Bayesian":[175],"method":[177],"experimentally.":[178]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
