{"id":"https://openalex.org/W1993704541","doi":"https://doi.org/10.1109/jstsp.2013.2255022","title":"Feature Search in the Grassmanian in Online Reinforcement Learning","display_name":"Feature Search in the Grassmanian in Online Reinforcement Learning","publication_year":2013,"publication_date":"2013-03-27","ids":{"openalex":"https://openalex.org/W1993704541","doi":"https://doi.org/10.1109/jstsp.2013.2255022","mag":"1993704541"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2013.2255022","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2013.2255022","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038163398","display_name":"Shalabh Bhatnagar","orcid":"https://orcid.org/0000-0001-7644-3914"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Shalabh Bhatnagar","raw_affiliation_strings":["Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","Dept of Computer Science & Automation, Indian Institute of Science, Bangalore, India#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Dept of Computer Science & Automation, Indian Institute of Science, Bangalore, India#TAB#","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018541798","display_name":"Vivek S. Borkar","orcid":"https://orcid.org/0000-0003-0756-5402"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vivek S. Borkar","raw_affiliation_strings":["Department of Electrical Engineering, Indian Institute of Technology, Powai, Mumbai, India","Dept. of Electr. Eng., Indian Inst. of Technol., Mumbai, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Indian Institute of Technology, Powai, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]},{"raw_affiliation_string":"Dept. of Electr. Eng., Indian Inst. of Technol., Mumbai, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052121692","display_name":"K. J. Prabuchandran","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Prabuchandran K. J.","raw_affiliation_strings":["Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","Dept of Computer Science & Automation, Indian Institute of Science, Bangalore, India#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Dept of Computer Science & Automation, Indian Institute of Science, Bangalore, India#TAB#","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5038163398"],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":1.9775,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.88106851,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"7","issue":"5","first_page":"746","last_page":"758"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8082473278045654},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6456729769706726},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5752352476119995},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5649265646934509},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.563184380531311},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.4970114529132843},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4731755554676056},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46524015069007874},{"id":"https://openalex.org/keywords/search-algorithm","display_name":"Search algorithm","score":0.45470768213272095},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.45193374156951904},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3949892520904541},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3681532144546509}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8082473278045654},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6456729769706726},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5752352476119995},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5649265646934509},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.563184380531311},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.4970114529132843},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4731755554676056},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46524015069007874},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.45470768213272095},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.45193374156951904},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3949892520904541},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3681532144546509},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/jstsp.2013.2255022","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2013.2255022","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},{"id":"pmh:oai:eprints.iisc.ac.in:47567","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196533","display_name":"ePrints-IISc. (Indian Institute of Science Bangalore)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59270414","host_organization_name":"Indian Institute of Science Bangalore","host_organization_lineage":["https://openalex.org/I59270414"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},{"id":"pmh:oai:dsapce.library.iitb.ac.in:100/15984","is_oa":false,"landing_page_url":"http://doi.org/10.1109/JSTSP.2013.2255022","pdf_url":null,"source":{"id":"https://openalex.org/S4306400899","display_name":"DSpace (IIT Bombay)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I162827531","host_organization_name":"Indian Institute of Technology Bombay","host_organization_lineage":["https://openalex.org/I162827531"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:dspace.library.iitb.ac.in:100/15984","is_oa":false,"landing_page_url":"http://dspace.library.iitb.ac.in/jspui/handle/100/15984","pdf_url":null,"source":{"id":"https://openalex.org/S4306400899","display_name":"DSpace (IIT Bombay)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I162827531","host_organization_name":"Indian Institute of Technology Bombay","host_organization_lineage":["https://openalex.org/I162827531"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.4099999964237213}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W359561291","https://openalex.org/W594357522","https://openalex.org/W1568229137","https://openalex.org/W1576452626","https://openalex.org/W1597303641","https://openalex.org/W1646707810","https://openalex.org/W1804110266","https://openalex.org/W1828381662","https://openalex.org/W1998172110","https://openalex.org/W2011233848","https://openalex.org/W2019172585","https://openalex.org/W2045512849","https://openalex.org/W2071983464","https://openalex.org/W2075268401","https://openalex.org/W2080631849","https://openalex.org/W2094364653","https://openalex.org/W2098432798","https://openalex.org/W2100677568","https://openalex.org/W2103198983","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2125852847","https://openalex.org/W2134042548","https://openalex.org/W2138326839","https://openalex.org/W2139418546","https://openalex.org/W2151283311","https://openalex.org/W2153267861","https://openalex.org/W2169647243","https://openalex.org/W2171611360","https://openalex.org/W2173945562","https://openalex.org/W2187770737","https://openalex.org/W2235056388","https://openalex.org/W2334782222","https://openalex.org/W2341171179","https://openalex.org/W2493209382","https://openalex.org/W2531891978","https://openalex.org/W2548880252","https://openalex.org/W2962834831","https://openalex.org/W3041202696","https://openalex.org/W4205293427","https://openalex.org/W4205326910","https://openalex.org/W4214717370","https://openalex.org/W4243772471","https://openalex.org/W4302033506","https://openalex.org/W6682375106","https://openalex.org/W6686922269","https://openalex.org/W7065010408"],"related_works":["https://openalex.org/W2145363145","https://openalex.org/W2386410636","https://openalex.org/W2341346307","https://openalex.org/W2025663273","https://openalex.org/W2154399718","https://openalex.org/W4321463377","https://openalex.org/W3099153698","https://openalex.org/W2768629321","https://openalex.org/W2130711276","https://openalex.org/W3038962357"],"abstract_inverted_index":{"We":[0,63,72],"consider":[1],"the":[2,6,23,40,58,93,98],"problem":[3],"of":[4,61,67,69,92],"finding":[5],"best":[7],"features":[8],"for":[9,97],"value":[10],"function":[11],"approximation":[12],"in":[13,39,57,90],"reinforcement":[14],"learning":[15,89],"and":[16],"develop":[17],"an":[18],"online":[19],"algorithm":[20,35,78,84],"to":[21],"optimize":[22],"mean":[24],"square":[25],"Bellman":[26],"error":[27],"objective.":[28],"For":[29],"any":[30],"given":[31],"feature":[32],"value,":[33],"our":[34,70,77],"performs":[36,54],"gradient":[37,46,55,95],"search":[38,56],"parameter":[41],"space":[42],"via":[43],"a":[44,50,65,82],"residual":[45,94],"scheme":[47,96],"and,":[48],"on":[49],"slower":[51],"timescale,":[52],"also":[53],"Grassman":[59],"manifold":[60],"features.":[62],"present":[64],"proof":[66],"convergence":[68],"algorithm.":[71],"show":[73],"empirical":[74],"results":[75],"using":[76],"as":[79,81],"well":[80],"similar":[83],"that":[85],"uses":[86],"temporal":[87],"difference":[88],"place":[91],"faster":[99],"timescale":[100],"updates.":[101]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
