{"id":"https://openalex.org/W2146957157","doi":"https://doi.org/10.1007/s10994-011-5251-x","title":"Characterizing reinforcement learning methods through parameterized learning problems","display_name":"Characterizing reinforcement learning methods through parameterized learning problems","publication_year":2011,"publication_date":"2011-06-03","ids":{"openalex":"https://openalex.org/W2146957157","doi":"https://doi.org/10.1007/s10994-011-5251-x","mag":"2146957157"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-011-5251-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-011-5251-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-011-5251-x.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-011-5251-x.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038200034","display_name":"Shivaram Kalyanakrishnan","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shivaram Kalyanakrishnan","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Austin, 1616 Guadalupe St Suite 2.408, Austin, TX, 78701, USA","Department of Computer Science, The University of Texas at Austin, Austin, USA 78701#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Austin, 1616 Guadalupe St Suite 2.408, Austin, TX, 78701, USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Austin, Austin, USA 78701#TAB#","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001594330","display_name":"Peter Stone","orcid":"https://orcid.org/0000-0002-6795-420X"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peter Stone","raw_affiliation_strings":["Department of Computer Science, The University of Texas at Austin, 1616 Guadalupe St Suite 2.408, Austin, TX, 78701, USA","Department of Computer Science, The University of Texas at Austin, Austin, USA 78701#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Austin, 1616 Guadalupe St Suite 2.408, Austin, TX, 78701, USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Department of Computer Science, The University of Texas at Austin, Austin, USA 78701#TAB#","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5038200034"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":null,"fwci":4.8234,"has_fulltext":true,"cited_by_count":22,"citation_normalized_percentile":{"value":0.95145913,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"84","issue":"1-2","first_page":"205","last_page":"247"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.871799647808075},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.7490153312683105},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6859184503555298},{"id":"https://openalex.org/keywords/observability","display_name":"Observability","score":0.6597449779510498},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5518136024475098},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5264645218849182},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5020499229431152},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.49275973439216614},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.49135056138038635},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.4654916524887085},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.45119473338127136},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function approximation","score":0.4474211633205414},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.42641302943229675},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4199267029762268},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19833561778068542},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19175094366073608},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.1589314043521881},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1554568111896515}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.871799647808075},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.7490153312683105},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6859184503555298},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.6597449779510498},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5518136024475098},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5264645218849182},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5020499229431152},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49275973439216614},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.49135056138038635},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.4654916524887085},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.45119473338127136},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.4474211633205414},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.42641302943229675},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4199267029762268},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19833561778068542},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19175094366073608},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.1589314043521881},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1554568111896515},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s10994-011-5251-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-011-5251-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-011-5251-x.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.226.2972","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.226.2972","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.utexas.edu/%7Eshivaram/papers/ks_ml_2011.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.1007/s10994-011-5251-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-011-5251-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-011-5251-x.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2470857384","display_name":"RI: Small: Efficient Reinforcement Learning for Generic Large-Scale Tasks","funder_award_id":"0917122","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3557999448","display_name":null,"funder_award_id":"IIS-0917122","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4935987103","display_name":"CSR--PDOS: Autonomic Systems: Integrating Machine Learning with Computer Systems","funder_award_id":"0615104","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8632988412","display_name":null,"funder_award_id":"DTFH61-07-H-00030","funder_id":"https://openalex.org/F4320332393","funder_display_name":"Federal Highway Administration"},{"id":"https://openalex.org/G8856069451","display_name":null,"funder_award_id":"N00014-09-1-0658","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G8876996369","display_name":null,"funder_award_id":"N00014","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310620","display_name":"University of Texas at Austin","ror":"https://ror.org/00hj54h04"},{"id":"https://openalex.org/F4320332393","display_name":"Federal Highway Administration","ror":"https://ror.org/0473rr271"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2146957157.pdf","grobid_xml":"https://content.openalex.org/works/W2146957157.grobid-xml"},"referenced_works_count":173,"referenced_works":["https://openalex.org/W6242441","https://openalex.org/W13294968","https://openalex.org/W14466878","https://openalex.org/W51508254","https://openalex.org/W91593682","https://openalex.org/W132137529","https://openalex.org/W183068496","https://openalex.org/W203276351","https://openalex.org/W225768461","https://openalex.org/W252213970","https://openalex.org/W583627169","https://openalex.org/W609661901","https://openalex.org/W1262227701","https://openalex.org/W1490019423","https://openalex.org/W1491129446","https://openalex.org/W1491843047","https://openalex.org/W1496062094","https://openalex.org/W1499371387","https://openalex.org/W1505937442","https://openalex.org/W1517322531","https://openalex.org/W1519451279","https://openalex.org/W1541084404","https://openalex.org/W1550698229","https://openalex.org/W1552830313","https://openalex.org/W1557517019","https://openalex.org/W1563127062","https://openalex.org/W1565759249","https://openalex.org/W1570690983","https://openalex.org/W1576452626","https://openalex.org/W1582783138","https://openalex.org/W1583380718","https://openalex.org/W1585546214","https://openalex.org/W1585603966","https://openalex.org/W1600046456","https://openalex.org/W1657674574","https://openalex.org/W1741816146","https://openalex.org/W1778554682","https://openalex.org/W1848006316","https://openalex.org/W1914583973","https://openalex.org/W1949804828","https://openalex.org/W1965227651","https://openalex.org/W1966280301","https://openalex.org/W1979601782","https://openalex.org/W2009303086","https://openalex.org/W2009708807","https://openalex.org/W2011418219","https://openalex.org/W2028335093","https://openalex.org/W2045491133","https://openalex.org/W2052117683","https://openalex.org/W2059654640","https://openalex.org/W2072054128","https://openalex.org/W2075268401","https://openalex.org/W2082691056","https://openalex.org/W2092353910","https://openalex.org/W2099873296","https://openalex.org/W2100563667","https://openalex.org/W2100677568","https://openalex.org/W2100752967","https://openalex.org/W2100787464","https://openalex.org/W2102863375","https://openalex.org/W2103626435","https://openalex.org/W2104228245","https://openalex.org/W2104578628","https://openalex.org/W2104641222","https://openalex.org/W2105336508","https://openalex.org/W2108734173","https://openalex.org/W2109008048","https://openalex.org/W2110486110","https://openalex.org/W2110962519","https://openalex.org/W2110972482","https://openalex.org/W2112076978","https://openalex.org/W2112264645","https://openalex.org/W2112899086","https://openalex.org/W2113913482","https://openalex.org/W2116339921","https://openalex.org/W2117341272","https://openalex.org/W2118318536","https://openalex.org/W2121863487","https://openalex.org/W2122659384","https://openalex.org/W2123663688","https://openalex.org/W2123859855","https://openalex.org/W2124175081","https://openalex.org/W2125612430","https://openalex.org/W2128302979","https://openalex.org/W2128630570","https://openalex.org/W2129018774","https://openalex.org/W2129113961","https://openalex.org/W2129240174","https://openalex.org/W2129620190","https://openalex.org/W2129670787","https://openalex.org/W2130801532","https://openalex.org/W2131660762","https://openalex.org/W2132083787","https://openalex.org/W2132414174","https://openalex.org/W2132713246","https://openalex.org/W2139418546","https://openalex.org/W2140219596","https://openalex.org/W2140671219","https://openalex.org/W2141559645","https://openalex.org/W2144366468","https://openalex.org/W2144744117","https://openalex.org/W2145680191","https://openalex.org/W2146917784","https://openalex.org/W2147147005","https://openalex.org/W2147148915","https://openalex.org/W2148067905","https://openalex.org/W2150304264","https://openalex.org/W2150339816","https://openalex.org/W2151554678","https://openalex.org/W2151661095","https://openalex.org/W2152761983","https://openalex.org/W2153192722","https://openalex.org/W2154225404","https://openalex.org/W2155027007","https://openalex.org/W2157963512","https://openalex.org/W2158823144","https://openalex.org/W2160308170","https://openalex.org/W2160519132","https://openalex.org/W2164056559","https://openalex.org/W2167390657","https://openalex.org/W2169022337","https://openalex.org/W2172141376","https://openalex.org/W2309755354","https://openalex.org/W2336687883","https://openalex.org/W2341171179","https://openalex.org/W2489939061","https://openalex.org/W2551869344","https://openalex.org/W2582998992","https://openalex.org/W2595408825","https://openalex.org/W2602256192","https://openalex.org/W2604799680","https://openalex.org/W2606507968","https://openalex.org/W2610184409","https://openalex.org/W2610686804","https://openalex.org/W2613433911","https://openalex.org/W2613680419","https://openalex.org/W2616052791","https://openalex.org/W2620087753","https://openalex.org/W2621300285","https://openalex.org/W2763384692","https://openalex.org/W2911964244","https://openalex.org/W2914775474","https://openalex.org/W3011120880","https://openalex.org/W3103182070","https://openalex.org/W3139377883","https://openalex.org/W3147501999","https://openalex.org/W4214717370","https://openalex.org/W4230992064","https://openalex.org/W4240845243","https://openalex.org/W4242606736","https://openalex.org/W4245296547","https://openalex.org/W4246329541","https://openalex.org/W4285719527","https://openalex.org/W4307347247","https://openalex.org/W6634528131","https://openalex.org/W6635049011","https://openalex.org/W6638026421","https://openalex.org/W6675698164","https://openalex.org/W6676769703","https://openalex.org/W6677834374","https://openalex.org/W6677916085","https://openalex.org/W6679257226","https://openalex.org/W6679412632","https://openalex.org/W6680870558","https://openalex.org/W6682567211","https://openalex.org/W6712574374","https://openalex.org/W6737210826","https://openalex.org/W6775686901","https://openalex.org/W6780394890","https://openalex.org/W6792155000","https://openalex.org/W6834815581","https://openalex.org/W6996005239","https://openalex.org/W7075666287"],"related_works":["https://openalex.org/W2152670157","https://openalex.org/W2386410636","https://openalex.org/W176737593","https://openalex.org/W4308702637","https://openalex.org/W2100100236","https://openalex.org/W2808418668","https://openalex.org/W61119710","https://openalex.org/W4256087190","https://openalex.org/W4240668504","https://openalex.org/W2011233848"],"abstract_inverted_index":null,"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
