{"id":"https://openalex.org/W2150339816","doi":"https://doi.org/10.1023/a:1007678930559","title":"Convergence Results for Single-Step On-Policy Reinforcement-Learning Algorithms","display_name":"Convergence Results for Single-Step On-Policy Reinforcement-Learning Algorithms","publication_year":2000,"publication_date":"2000-03-01","ids":{"openalex":"https://openalex.org/W2150339816","doi":"https://doi.org/10.1023/a:1007678930559","mag":"2150339816"},"language":"en","primary_location":{"id":"doi:10.1023/a:1007678930559","is_oa":true,"landing_page_url":"https://doi.org/10.1023/a:1007678930559","pdf_url":"https://link.springer.com/content/pdf/10.1023/A:1007678930559.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://link.springer.com/content/pdf/10.1023/A:1007678930559.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065366930","display_name":"Satinder Singh","orcid":"https://orcid.org/0000-0002-2736-7641"},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Satinder Singh","raw_affiliation_strings":["AT&T Labs-Research, 180 Park Avenue, Florham Park, NJ, 07932, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AT&T Labs-Research, 180 Park Avenue, Florham Park, NJ, 07932, USA","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048915657","display_name":"Tommi Jaakkola","orcid":"https://orcid.org/0000-0002-2199-0379"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tommi Jaakkola","raw_affiliation_strings":["Department of Computer Science, Massachusetts Institute of Technology, Cambridge, MA, 02139, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Massachusetts Institute of Technology, Cambridge, MA, 02139, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009722403","display_name":"Michael L. Littman","orcid":"https://orcid.org/0000-0002-5596-1840"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael L. Littman","raw_affiliation_strings":["Department of Computer Science, Duke University, Durham, NC, 27708-0129, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Duke University, Durham, NC, 27708-0129, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069856068","display_name":"Csaba Szepesv\u00e1ri","orcid":"https://orcid.org/0000-0002-9286-2892"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Csaba Szepesv\u00e1ri","raw_affiliation_strings":["Mindmaker Ltd., Konkoly Thege M. u. 29-33, Budapest, 1121, Hungary"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mindmaker Ltd., Konkoly Thege M. u. 29-33, Budapest, 1121, Hungary","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":null,"fwci":22.3008,"has_fulltext":true,"cited_by_count":625,"citation_normalized_percentile":{"value":0.99499518,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"38","issue":"3","first_page":"287","last_page":"308"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10551","display_name":"Scheduling and Optimization Algorithms","score":0.9828000068664551,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9672999978065491,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8362650275230408},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.7256089448928833},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6827136278152466},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5112094879150391},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.49874210357666016},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.47469061613082886},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.4347797632217407},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.386441171169281},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.22365134954452515},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19628959894180298},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.060238927602767944}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8362650275230408},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.7256089448928833},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6827136278152466},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5112094879150391},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.49874210357666016},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.47469061613082886},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.4347797632217407},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.386441171169281},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.22365134954452515},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19628959894180298},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.060238927602767944},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1023/a:1007678930559","is_oa":true,"landing_page_url":"https://doi.org/10.1023/a:1007678930559","pdf_url":"https://link.springer.com/content/pdf/10.1023/A:1007678930559.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1023/a:1007678930559","is_oa":true,"landing_page_url":"https://doi.org/10.1023/a:1007678930559","pdf_url":"https://link.springer.com/content/pdf/10.1023/A:1007678930559.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6671297155","display_name":null,"funder_award_id":"CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7318137282","display_name":"CAREER:  Planning Under Uncertainty in Large Domains","funder_award_id":"9702576","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8133388860","display_name":"Exploiting Structure in Reinforcement Learning Problems","funder_award_id":"9711753","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321994","display_name":"Hungarian Scientific Research Fund","ror":"https://ror.org/00v349e63"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2150339816.pdf","grobid_xml":"https://content.openalex.org/works/W2150339816.grobid-xml"},"referenced_works_count":57,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W58217484","https://openalex.org/W65193931","https://openalex.org/W132264163","https://openalex.org/W1515851193","https://openalex.org/W1515891729","https://openalex.org/W1535136082","https://openalex.org/W1548889916","https://openalex.org/W1626977535","https://openalex.org/W1826343992","https://openalex.org/W1965227651","https://openalex.org/W2006650111","https://openalex.org/W2009533501","https://openalex.org/W2039439610","https://openalex.org/W2064401136","https://openalex.org/W2080631849","https://openalex.org/W2098432798","https://openalex.org/W2100677568","https://openalex.org/W2107726111","https://openalex.org/W2113913482","https://openalex.org/W2117341272","https://openalex.org/W2118219918","https://openalex.org/W2119567691","https://openalex.org/W2124175081","https://openalex.org/W2125074935","https://openalex.org/W2131600418","https://openalex.org/W2139418546","https://openalex.org/W2147750403","https://openalex.org/W2148173263","https://openalex.org/W2158091072","https://openalex.org/W2165131254","https://openalex.org/W2169022337","https://openalex.org/W2334782222","https://openalex.org/W2341171179","https://openalex.org/W2545350044","https://openalex.org/W2787259794","https://openalex.org/W3011120880","https://openalex.org/W3139377883","https://openalex.org/W4212774754","https://openalex.org/W4214717370","https://openalex.org/W4233061323","https://openalex.org/W4233696721","https://openalex.org/W4236243725","https://openalex.org/W4243421009","https://openalex.org/W4245724425","https://openalex.org/W4248400504","https://openalex.org/W4253978526","https://openalex.org/W4298023569","https://openalex.org/W4362203700","https://openalex.org/W6602318556","https://openalex.org/W6602690695","https://openalex.org/W6631000544","https://openalex.org/W6674995601","https://openalex.org/W6704298589","https://openalex.org/W6792155000","https://openalex.org/W6817406072","https://openalex.org/W6996005239"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W20361778","https://openalex.org/W2024136090","https://openalex.org/W2964765435"],"abstract_inverted_index":null,"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":31},{"year":2024,"cited_by_count":27},{"year":2023,"cited_by_count":20},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":46},{"year":2020,"cited_by_count":52},{"year":2019,"cited_by_count":50},{"year":2018,"cited_by_count":36},{"year":2017,"cited_by_count":22},{"year":2016,"cited_by_count":17},{"year":2015,"cited_by_count":24},{"year":2014,"cited_by_count":31},{"year":2013,"cited_by_count":21},{"year":2012,"cited_by_count":25}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2016-06-24T00:00:00"}
