{"id":"https://openalex.org/W2107277317","doi":"https://doi.org/10.1145/1015330.1015390","title":"Convergence of synchronous reinforcement learning with linear function approximation","display_name":"Convergence of synchronous reinforcement learning with linear function approximation","publication_year":2004,"publication_date":"2004-01-01","ids":{"openalex":"https://openalex.org/W2107277317","doi":"https://doi.org/10.1145/1015330.1015390","mag":"2107277317"},"language":"en","primary_location":{"id":"doi:10.1145/1015330.1015390","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1015330.1015390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Twenty-first international conference on Machine learning  - ICML '04","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010723602","display_name":"Artur Merke","orcid":null},"institutions":[{"id":"https://openalex.org/I200332995","display_name":"TU Dortmund University","ror":"https://ror.org/01k97gp34","country_code":"DE","type":"education","lineage":["https://openalex.org/I200332995"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Artur Merke","raw_affiliation_strings":["University of Dortmund, Dortmund, Germany"],"affiliations":[{"raw_affiliation_string":"University of Dortmund, Dortmund, Germany","institution_ids":["https://openalex.org/I200332995"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039367044","display_name":"Ralf Schoknecht","orcid":null},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]},{"id":"https://openalex.org/I4210119349","display_name":"Karlsruhe University of Education","ror":"https://ror.org/01t1kq612","country_code":"DE","type":"education","lineage":["https://openalex.org/I4210119349"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ralf Schoknecht","raw_affiliation_strings":["University of Karlsruhe, Karlsruhe, Germany","University of Karlsruhe, Karlsruhe, Germany#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Karlsruhe, Karlsruhe, Germany","institution_ids":["https://openalex.org/I4210119349"]},{"raw_affiliation_string":"University of Karlsruhe, Karlsruhe, Germany#TAB#","institution_ids":["https://openalex.org/I102335020"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5010723602"],"corresponding_institution_ids":["https://openalex.org/I200332995"],"apc_list":null,"apc_paid":null,"fwci":1.8485,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.87915749,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"75","last_page":"75"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":0.963699996471405,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/counterexample","display_name":"Counterexample","score":0.7618879675865173},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.7550125122070312},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6665425300598145},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.636502742767334},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.5637741088867188},{"id":"https://openalex.org/keywords/compact-convergence","display_name":"Compact convergence","score":0.5047179460525513},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.49903249740600586},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.4904575049877167},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.48241958022117615},{"id":"https://openalex.org/keywords/convergence-tests","display_name":"Convergence tests","score":0.45456036925315857},{"id":"https://openalex.org/keywords/uniform-convergence","display_name":"Uniform convergence","score":0.4146375060081482},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.347923219203949},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3258001506328583},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.2300909161567688},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.2128891944885254},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.073099285364151}],"concepts":[{"id":"https://openalex.org/C162838799","wikidata":"https://www.wikidata.org/wiki/Q596077","display_name":"Counterexample","level":2,"score":0.7618879675865173},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.7550125122070312},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6665425300598145},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.636502742767334},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5637741088867188},{"id":"https://openalex.org/C5871375","wikidata":"https://www.wikidata.org/wiki/Q1780715","display_name":"Compact convergence","level":4,"score":0.5047179460525513},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.49903249740600586},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.4904575049877167},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.48241958022117615},{"id":"https://openalex.org/C538505008","wikidata":"https://www.wikidata.org/wiki/Q3979448","display_name":"Convergence tests","level":4,"score":0.45456036925315857},{"id":"https://openalex.org/C157709441","wikidata":"https://www.wikidata.org/wiki/Q1411887","display_name":"Uniform convergence","level":3,"score":0.4146375060081482},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.347923219203949},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3258001506328583},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.2300909161567688},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.2128891944885254},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.073099285364151},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1015330.1015390","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1015330.1015390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Twenty-first international conference on Machine learning  - ICML '04","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.58.7053","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.58.7053","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.aicml.cs.ualberta.ca/banff04/icml/pages/papers/206.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W598100446","https://openalex.org/W1502893368","https://openalex.org/W1510531757","https://openalex.org/W1560263223","https://openalex.org/W1560592371","https://openalex.org/W1576452626","https://openalex.org/W1646707810","https://openalex.org/W1946956835","https://openalex.org/W2010315317","https://openalex.org/W2100677568","https://openalex.org/W2109330238","https://openalex.org/W2139418546","https://openalex.org/W2155143322"],"related_works":["https://openalex.org/W2034145661","https://openalex.org/W2028595206","https://openalex.org/W2951265198","https://openalex.org/W2995787120","https://openalex.org/W2102183453","https://openalex.org/W2099365622","https://openalex.org/W1581641621","https://openalex.org/W2060809327","https://openalex.org/W2161833392","https://openalex.org/W2044762910"],"abstract_inverted_index":{"Synchronous":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"algorithms":[4,86],"with":[5,87],"linear":[6],"function":[7,88],"approximation":[8],"are":[9,40],"representable":[10],"as":[11],"inhomogeneous":[12,33],"matrix":[13,34],"iterations":[14,35],"of":[15,29,61,68,84,100],"a":[16,58,96,113,128],"special":[17],"form":[18],"(Schoknecht":[19,52],"&":[20,53,122],"Merke,":[21,54],"2003).":[22],"In":[23],"this":[24],"paper":[25],"we":[26,111],"state":[27],"conditions":[28],"convergence":[30,62,69,81,101],"for":[31,102,115,141],"general":[32],"and":[36,43,72,82],"prove":[37,80],"that":[38],"they":[39],"both":[41],"necessary":[42,71],"sufficient.":[44],"This":[45,126],"result":[46,76],"extends":[47],"the":[48,66,74,92,103,117,132,136],"work":[49],"presented":[50],"in":[51],"2003),":[55],"where":[56],"only":[57],"sufficient":[59],"condition":[60,67],"was":[63],"proved.":[64],"As":[65],"is":[70,77],"sufficient,":[73],"new":[75,97],"suitable":[78],"to":[79,94,131],"divergence":[83],"RL":[85,119,138],"approximation.":[89],"We":[90],"use":[91],"theorem":[93],"deduce":[95],"concise":[98],"proof":[99],"synchronous":[104],"residual":[105],"gradient":[106],"algorithm":[107,120,139],"(Baird,":[108],"1995).":[109],"Moreover,":[110],"derive":[112],"counterexample":[114],"which":[116],"uniform":[118,137],"(Merke":[121],"Schoknecht,":[123],"2002)":[124],"diverges.":[125],"yields":[127],"negative":[129],"answer":[130],"open":[133],"question":[134],"if":[135],"converges":[140],"arbitrary":[142],"multiple":[143],"transitions.":[144]},"counts_by_year":[{"year":2014,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
