{"id":"https://openalex.org/W4416078036","doi":"https://doi.org/10.1109/tac.2025.3631342","title":"Does DQN Learn?","display_name":"Does DQN Learn?","publication_year":2025,"publication_date":"2025-11-10","ids":{"openalex":"https://openalex.org/W4416078036","doi":"https://doi.org/10.1109/tac.2025.3631342"},"language":null,"primary_location":{"id":"doi:10.1109/tac.2025.3631342","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2025.3631342","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030430086","display_name":"Aditya Gopalan","orcid":"https://orcid.org/0000-0002-7323-2975"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Aditya Gopalan","raw_affiliation_strings":["Department of Electrical Communication Engineering, Indian Institute of Science (IISc), Bengaluru, India"],"raw_orcid":"https://orcid.org/0000-0002-7323-2975","affiliations":[{"raw_affiliation_string":"Department of Electrical Communication Engineering, Indian Institute of Science (IISc), Bengaluru, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047138399","display_name":"Gugan Thoppe","orcid":"https://orcid.org/0000-0001-5066-6589"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Gugan Thoppe","raw_affiliation_strings":["Computer Science and Automation Department, Indian Institute of Science (IISc), Bengaluru, India","Computer Science and Automation Dept., Indian Institute of Science (IISc), Bengaluru, India"],"raw_orcid":"https://orcid.org/0000-0001-5066-6589","affiliations":[{"raw_affiliation_string":"Computer Science and Automation Department, Indian Institute of Science (IISc), Bengaluru, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Computer Science and Automation Dept., Indian Institute of Science (IISc), Bengaluru, India","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5030430086"],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17349201,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"71","issue":"4","first_page":"2482","last_page":"2495"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5515999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5515999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.049400001764297485,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.03480000048875809,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6859999895095825},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5954999923706055},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.5842000246047974},{"id":"https://openalex.org/keywords/fixed-point","display_name":"Fixed point","score":0.5598999857902527},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5102999806404114},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4507000148296356},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function approximation","score":0.421099990606308},{"id":"https://openalex.org/keywords/differential-inclusion","display_name":"Differential inclusion","score":0.4083999991416931},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.3862999975681305}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6859999895095825},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5954999923706055},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.5842000246047974},{"id":"https://openalex.org/C61445026","wikidata":"https://www.wikidata.org/wiki/Q217608","display_name":"Fixed point","level":2,"score":0.5598999857902527},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5102999806404114},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4778999984264374},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4616999924182892},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.453900009393692},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4507000148296356},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.421099990606308},{"id":"https://openalex.org/C70834904","wikidata":"https://www.wikidata.org/wiki/Q1054638","display_name":"Differential inclusion","level":2,"score":0.4083999991416931},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.3862999975681305},{"id":"https://openalex.org/C93226319","wikidata":"https://www.wikidata.org/wiki/Q193137","display_name":"Differential (mechanical device)","level":2,"score":0.36809998750686646},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3490999937057495},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C148043351","wikidata":"https://www.wikidata.org/wiki/Q4456944","display_name":"Current (fluid)","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C160824197","wikidata":"https://www.wikidata.org/wiki/Q2071054","display_name":"Linear approximation","level":3,"score":0.320499986410141},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.3093000054359436},{"id":"https://openalex.org/C63548660","wikidata":"https://www.wikidata.org/wiki/Q858223","display_name":"Limit point","level":2,"score":0.3082999885082245},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3018999993801117},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.30149999260902405},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.2818000018596649},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.27649998664855957},{"id":"https://openalex.org/C2777299769","wikidata":"https://www.wikidata.org/wiki/Q3707858","display_name":"Type (biology)","level":2,"score":0.2596000134944916}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2025.3631342","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2025.3631342","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0],"primary":[1],"requirement":[2],"for":[3,93],"any":[4],"reinforcement":[5],"learning":[6],"method":[7],"is":[8,56],"that":[9,14,25,74,80,106,142,160],"it":[10,39],"should":[11],"produce":[12],"policies":[13],"improve":[15],"upon":[16],"the":[17,26,61,84,118,143,187,198],"initial":[18,85],"guess.":[19],"In":[20],"this":[21,35,94],"work,":[22],"we":[23,71,88,140,158],"show":[24,73,159],"widely":[27],"used":[28],"Deep":[29],"Q-Network":[30],"(DQN)":[31],"fails":[32],"to":[33,41,58,60,150,167],"satisfy":[34],"minimal":[36],"criterion\u2014even":[37],"when":[38],"gets":[40],"see":[42],"all":[43],"possible":[44],"states":[45],"and":[46,131,206],"actions":[47],"infinitely":[48],"often":[49,76],"(a":[50],"condition":[51],"under":[52],"which":[53],"tabular":[54],"Q-learning":[55,202],"guaranteed":[57],"converge":[59],"optimal":[62],"Q-value":[63],"function).":[64],"Our":[65,190],"specific":[66],"contributions":[67],"are":[68],"twofold.":[69],"First,":[70],"numerically":[72],"DQN":[75,105,148,184],"returns":[77],"a":[78,90,101,180,193],"policy":[79],"performs":[81],"worse":[82],"than":[83],"one.":[86],"Second,":[87],"offer":[89],"theoretical":[91],"explanation":[92],"phenomenon":[95],"in":[96,111,196],"<italic":[97],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[98,125,208],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">linear":[99],"DQN</i>,":[100],"simplified":[102],"version":[103],"of":[104,113,146,153,201],"uses":[107],"linear":[108,147,173,183],"function":[109,204],"approximation":[110,205],"place":[112],"neural":[114],"networks":[115],"while":[116],"retaining":[117],"other":[119],"key":[120],"components":[121],"such":[122],"as":[123],"<inline-formula":[124,207],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[126,209],"notation=\"LaTeX\">$\\epsilon$</tex-math></inline-formula>-greedy":[127,210],"exploration,":[128],"experience":[129],"replay,":[130],"target":[132],"network.":[133],"Using":[134],"tools":[135],"from":[136],"differential":[137],"inclusion":[138],"theory,":[139],"prove":[141],"limit":[144],"points":[145,152,163],"correspond":[149],"fixed":[151,162],"projected":[154],"Bellman":[155],"operators.":[156],"Crucially,":[157],"these":[161],"need":[164],"not":[165],"relate":[166],"optimal\u2014or":[168],"even":[169],"near-optimal\u2014policies,":[170],"thus":[171],"explaining":[172],"DQN's":[174],"sub-optimal":[175],"behaviors.":[176],"We":[177],"also":[178],"give":[179],"scenario":[181],"where":[182],"always":[185],"identifies":[186],"worst":[188],"policy.":[189],"work":[191],"fills":[192],"longstanding":[194],"gap":[195],"understanding":[197],"convergence":[199],"behaviors":[200],"with":[203],"exploration.":[211]},"counts_by_year":[],"updated_date":"2026-03-31T06:02:25.137627","created_date":"2025-11-10T00:00:00"}
