{"id":"https://openalex.org/W4410295167","doi":"https://doi.org/10.1109/tit.2025.3569421","title":"Asymptotic Analysis of Sample-Averaged Q-Learning","display_name":"Asymptotic Analysis of Sample-Averaged Q-Learning","publication_year":2025,"publication_date":"2025-05-12","ids":{"openalex":"https://openalex.org/W4410295167","doi":"https://doi.org/10.1109/tit.2025.3569421"},"language":"en","primary_location":{"id":"doi:10.1109/tit.2025.3569421","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2025.3569421","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102665106","display_name":"Saunak Kumar Panda","orcid":null},"institutions":[{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Saunak Kumar Panda","raw_affiliation_strings":["Department of Industrial and Systems Engineering, University of Houston, Houston, TX, USA","University of Houston, Houston, TX, USA"],"raw_orcid":"https://orcid.org/0009-0008-2399-2387","affiliations":[{"raw_affiliation_string":"Department of Industrial and Systems Engineering, University of Houston, Houston, TX, USA","institution_ids":["https://openalex.org/I44461941"]},{"raw_affiliation_string":"University of Houston, Houston, TX, USA","institution_ids":["https://openalex.org/I44461941"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052242880","display_name":"Ruiqi Liu","orcid":"https://orcid.org/0000-0001-9392-3071"},"institutions":[{"id":"https://openalex.org/I12315562","display_name":"Texas Tech University","ror":"https://ror.org/0405mnx93","country_code":"US","type":"education","lineage":["https://openalex.org/I12315562"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruiqi Liu","raw_affiliation_strings":["Department of Mathematics and Statistics, Texas Tech University, Lubbock, TX, USA","Texas Tech University, Lubbock, TX, USA"],"raw_orcid":"https://orcid.org/0000-0001-9392-3071","affiliations":[{"raw_affiliation_string":"Department of Mathematics and Statistics, Texas Tech University, Lubbock, TX, USA","institution_ids":["https://openalex.org/I12315562"]},{"raw_affiliation_string":"Texas Tech University, Lubbock, TX, USA","institution_ids":["https://openalex.org/I12315562"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063225585","display_name":"Yisha Xiang","orcid":"https://orcid.org/0000-0003-0696-2924"},"institutions":[{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yisha Xiang","raw_affiliation_strings":["Department of Industrial and Systems Engineering, University of Houston, Houston, TX, USA","University of Houston, Houston, TX, USA"],"raw_orcid":"https://orcid.org/0000-0003-0696-2924","affiliations":[{"raw_affiliation_string":"Department of Industrial and Systems Engineering, University of Houston, Houston, TX, USA","institution_ids":["https://openalex.org/I44461941"]},{"raw_affiliation_string":"University of Houston, Houston, TX, USA","institution_ids":["https://openalex.org/I44461941"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102665106"],"corresponding_institution_ids":["https://openalex.org/I44461941"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04395385,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"71","issue":"7","first_page":"5601","last_page":"5619"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8149999976158142,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8149999976158142,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.7745000123977661,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5257485508918762},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.48401662707328796},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.43745875358581543},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.35696160793304443},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.17241990566253662}],"concepts":[{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5257485508918762},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.48401662707328796},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.43745875358581543},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.35696160793304443},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.17241990566253662},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tit.2025.3569421","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2025.3569421","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Information Theory","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2994704086","display_name":null,"funder_award_id":"2305486","funder_id":"https://openalex.org/F4320337391","funder_display_name":"Division of Civil, Mechanical and Manufacturing Innovation"},{"id":"https://openalex.org/G7995056702","display_name":null,"funder_award_id":"2305486","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337391","display_name":"Division of Civil, Mechanical and Manufacturing Innovation","ror":"https://ror.org/028yd4c30"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1460189015","https://openalex.org/W1969727526","https://openalex.org/W1977655452","https://openalex.org/W2009303086","https://openalex.org/W2075672181","https://openalex.org/W2146774335","https://openalex.org/W2619334774","https://openalex.org/W2975927704","https://openalex.org/W3004346979","https://openalex.org/W3007705674","https://openalex.org/W3088353378","https://openalex.org/W3100789280","https://openalex.org/W3124858243","https://openalex.org/W3169728738","https://openalex.org/W3191746168","https://openalex.org/W3207876914","https://openalex.org/W4213251304","https://openalex.org/W4238852866","https://openalex.org/W4366692289","https://openalex.org/W4394627273","https://openalex.org/W6741202896","https://openalex.org/W6758338957","https://openalex.org/W6763948599","https://openalex.org/W6803418194","https://openalex.org/W6840893754","https://openalex.org/W6848322386"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4391375266","https://openalex.org/W1979597421","https://openalex.org/W2007980826","https://openalex.org/W2061531152","https://openalex.org/W3002753104","https://openalex.org/W2077600819","https://openalex.org/W2142036596","https://openalex.org/W2072657027"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,137],"(RL)":[2],"has":[3],"emerged":[4],"as":[5],"a":[6,36,78,98,148],"key":[7],"approach":[8],"for":[9,25,39,63,102,152,164],"training":[10],"agents":[11],"in":[12,20,30],"complex":[13],"and":[14,27,57,66,127,141,161],"uncertain":[15],"environments.":[16],"Incorporating":[17],"statistical":[18,162],"inference":[19,163],"RL":[21,165],"algorithms":[22],"is":[23],"essential":[24],"understanding":[26],"managing":[28],"uncertainty":[29],"model":[31],"performance.":[32],"This":[33,145],"paper":[34],"introduces":[35],"generalized":[37],"framework":[38,80],"time-varying":[40],"batch-averaged":[41],"Q-learning,":[42,154],"termed":[43],"sample-averaged":[44,90,153],"Q-learning":[45,51],"(SA-QL),":[46],"which":[47],"extends":[48],"traditional":[49],"single-sample":[50],"by":[52],"aggregating":[53],"samples":[54],"of":[55,88,108],"rewards":[56],"next":[58],"states":[59],"to":[60,76],"better":[61],"account":[62],"data":[64],"variability":[65],"uncertainty.":[67],"We":[68],"leverage":[69],"the":[70,85,89,106],"functional":[71],"central":[72],"limit":[73],"theorem":[74],"(FCLT)":[75],"establish":[77],"novel":[79],"that":[81],"provides":[82],"insights":[83,156],"into":[84,157],"asymptotic":[86],"normality":[87],"algorithm":[91],"under":[92],"mild":[93],"conditions.":[94],"Additionally,":[95],"we":[96],"develop":[97],"random":[99],"scaling":[100],"method":[101],"interval":[103,143],"estimation,":[104],"enabling":[105],"construction":[107],"confidence":[109,142],"intervals":[110],"without":[111],"requiring":[112],"extra":[113],"hyperparameters.":[114],"Extensive":[115],"numerical":[116],"experiments":[117],"across":[118],"classic":[119],"stochastic":[120],"OpenAI":[121],"Gym":[122],"environments,":[123],"including":[124],"windy":[125],"gridworld":[126],"slippery":[128],"frozenlake,":[129],"demonstrate":[130],"how":[131],"different":[132],"batch":[133,159],"scheduling":[134,160],"strategies":[135],"affect":[136],"efficiency,":[138],"coverage":[139],"rates,":[140],"widths.":[144],"work":[146],"establishes":[147],"unified":[149],"theoretical":[150],"foundation":[151],"providing":[155],"effective":[158],"algorithms.":[166]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
