{"id":"https://openalex.org/W2256989395","doi":"https://doi.org/10.1145/2868723","title":"Actor-Critic Algorithms with Online Feature Adaptation","display_name":"Actor-Critic Algorithms with Online Feature Adaptation","publication_year":2016,"publication_date":"2016-02-09","ids":{"openalex":"https://openalex.org/W2256989395","doi":"https://doi.org/10.1145/2868723","mag":"2256989395"},"language":"en","primary_location":{"id":"doi:10.1145/2868723","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2868723","pdf_url":null,"source":{"id":"https://openalex.org/S38703467","display_name":"ACM Transactions on Modeling and Computer Simulation","issn_l":"1049-3301","issn":["1049-3301","1558-1195"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Modeling and Computer Simulation","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052121692","display_name":"K. J. Prabuchandran","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"K. J. Prabuchandran","raw_affiliation_strings":["Indian Institute of Science, Bangalore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Science, Bangalore","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038163398","display_name":"Shalabh Bhatnagar","orcid":"https://orcid.org/0000-0001-7644-3914"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shalabh Bhatnagar","raw_affiliation_strings":["Indian Institute of Science, Bangalore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Science, Bangalore","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018541798","display_name":"Vivek S. Borkar","orcid":"https://orcid.org/0000-0003-0756-5402"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Vivek S. Borkar","raw_affiliation_strings":["Indian Institute of Technology, Powai, Mumbai"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology, Powai, Mumbai","institution_ids":["https://openalex.org/I162827531"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.00426834,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"26","issue":"4","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simultaneous-perturbation-stochastic-approximation","display_name":"Simultaneous perturbation stochastic approximation","score":0.8599221706390381},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7118667364120483},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.641586184501648},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.633135199546814},{"id":"https://openalex.org/keywords/stochastic-approximation","display_name":"Stochastic approximation","score":0.6142150163650513},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5796750783920288},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5228312611579895},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function approximation","score":0.5034865736961365},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.49163323640823364},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.46739158034324646},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.44494393467903137},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.43488526344299316},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35835000872612},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3391503691673279},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.29043048620224},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.2191345989704132},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.19290021061897278},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.18771669268608093},{"id":"https://openalex.org/keywords/stochastic-process","display_name":"Stochastic process","score":0.16235226392745972}],"concepts":[{"id":"https://openalex.org/C2779880469","wikidata":"https://www.wikidata.org/wiki/Q17084424","display_name":"Simultaneous perturbation stochastic approximation","level":3,"score":0.8599221706390381},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7118667364120483},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.641586184501648},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.633135199546814},{"id":"https://openalex.org/C55479107","wikidata":"https://www.wikidata.org/wiki/Q97663916","display_name":"Stochastic approximation","level":3,"score":0.6142150163650513},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5796750783920288},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5228312611579895},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.5034865736961365},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.49163323640823364},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.46739158034324646},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.44494393467903137},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.43488526344299316},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35835000872612},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3391503691673279},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.29043048620224},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.2191345989704132},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.19290021061897278},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.18771669268608093},{"id":"https://openalex.org/C8272713","wikidata":"https://www.wikidata.org/wiki/Q176737","display_name":"Stochastic process","level":2,"score":0.16235226392745972},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2868723","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2868723","pdf_url":null,"source":{"id":"https://openalex.org/S38703467","display_name":"ACM Transactions on Modeling and Computer Simulation","issn_l":"1049-3301","issn":["1049-3301","1558-1195"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Modeling and Computer Simulation","raw_type":"journal-article"},{"id":"pmh:oai:eprints.iisc.ac.in:54052","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196533","display_name":"ePrints-IISc. (Indian Institute of Science Bangalore)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59270414","host_organization_name":"Indian Institute of Science Bangalore","host_organization_lineage":["https://openalex.org/I59270414"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},{"id":"pmh:oai:dsapce.library.iitb.ac.in:123456789/21371","is_oa":false,"landing_page_url":"http://doi.org/10.1145/2868723","pdf_url":null,"source":{"id":"https://openalex.org/S4306400899","display_name":"DSpace (IIT Bombay)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I162827531","host_organization_name":"Indian Institute of Technology Bombay","host_organization_lineage":["https://openalex.org/I162827531"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7699999809265137,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320313903","display_name":"Tata Consultancy Services","ror":"https://ror.org/01b9n8m42"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W359561291","https://openalex.org/W594357522","https://openalex.org/W1544439040","https://openalex.org/W1583837637","https://openalex.org/W1646707810","https://openalex.org/W1742709735","https://openalex.org/W1804110266","https://openalex.org/W1828381662","https://openalex.org/W1985291828","https://openalex.org/W1993704541","https://openalex.org/W1998172110","https://openalex.org/W2009303086","https://openalex.org/W2011233848","https://openalex.org/W2045512849","https://openalex.org/W2059486486","https://openalex.org/W2091565802","https://openalex.org/W2094364653","https://openalex.org/W2094387729","https://openalex.org/W2098432798","https://openalex.org/W2117138194","https://openalex.org/W2120465407","https://openalex.org/W2121863487","https://openalex.org/W2124289529","https://openalex.org/W2126017757","https://openalex.org/W2138326839","https://openalex.org/W2139418546","https://openalex.org/W2151283311","https://openalex.org/W2153267861","https://openalex.org/W2155027007","https://openalex.org/W2161795906","https://openalex.org/W2162831327","https://openalex.org/W2169125102","https://openalex.org/W2171611360","https://openalex.org/W2173945562","https://openalex.org/W2187770737","https://openalex.org/W2235056388","https://openalex.org/W2401135644","https://openalex.org/W2487739765","https://openalex.org/W2493209382","https://openalex.org/W2548880252","https://openalex.org/W2952907054","https://openalex.org/W3016208987","https://openalex.org/W3027095131","https://openalex.org/W4205293427","https://openalex.org/W4214717370","https://openalex.org/W4243772471","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2038213655","https://openalex.org/W2079907724","https://openalex.org/W2546134129","https://openalex.org/W2169111093","https://openalex.org/W4382936136","https://openalex.org/W2104712676","https://openalex.org/W2118205332","https://openalex.org/W2152670157","https://openalex.org/W2885971921","https://openalex.org/W1973158011"],"abstract_inverted_index":{"We":[0,217],"develop":[1],"two":[2,230],"new":[3],"online":[4],"actor-critic":[5,38,221],"control":[6],"algorithms":[7,20,222],"with":[8],"adaptive":[9],"feature":[10,239],"tuning":[11],"for":[12,23,33],"Markov":[13],"Decision":[14],"Processes":[15],"(MDPs).":[16],"One":[17],"of":[18,63,68,76,80,111,136,161,169,182,188,203],"our":[19,121,220,238],"is":[21,57,96,155],"proposed":[22],"the":[24,30,44,47,54,61,64,69,77,81,85,93,105,112,126,134,150,162,170,180,183,186,189,195,201,204,210],"long-run":[25],"average":[26,190],"cost":[27,35,191,206],"objective,":[28,207],"while":[29,199],"other":[31,94],"works":[32],"discounted":[34,205],"MDPs.":[36],"Our":[37],"architecture":[39],"incorporates":[40],"parameterization":[41],"both":[42],"in":[43,53,109,117,146,185,200],"policy":[45,55,82,196],"and":[46,102,165],"value":[48,78,90,113,163],"function.":[49],"A":[50],"gradient":[51,131,181,197],"search":[52],"parameters":[56],"performed":[58],"to":[59,84,138,148,156,172,178,224],"improve":[60],"performance":[62,234],"actor.":[65],"The":[66,89,107,153],"computation":[67],"aforementioned":[70],"gradient,":[71],"however,":[72,115],"requires":[73],"an":[74],"estimate":[75,179],"function":[79,100,164],"corresponding":[83],"current":[86],"actor":[87,171],"parameter.":[88],"function,":[91,114],"on":[92,133,229],"hand,":[95],"approximated":[97],"using":[98],"linear":[99],"approximation":[101,110,160,214],"obtained":[103],"from":[104,237],"critic.":[106],"error":[108,144],"results":[116],"suboptimal":[118],"policies.":[119,175,227],"In":[120,176],"article,":[122],"we":[123,193,208],"also":[124],"update":[125],"features":[127,137],"by":[128],"performing":[129],"a":[130,140,158],"descent":[132],"Grassmannian":[135],"minimize":[139],"mean":[141],"square":[142],"Bellman":[143],"objective":[145,184],"order":[147,177],"find":[149],"best":[151],"features.":[152],"aim":[154],"obtain":[157],"good":[159],"thereby":[166],"ensure":[167],"convergence":[168],"locally":[173,225],"optimal":[174,226],"case":[187,202],"criterion,":[192],"utilize":[194,209],"theorem,":[198],"simultaneous":[211],"perturbation":[212],"stochastic":[213],"(SPSA)":[215],"scheme.":[216,241],"prove":[218],"that":[219],"converge":[223],"Experiments":[228],"different":[231],"settings":[232],"show":[233],"improvements":[235],"resulting":[236],"adaptation":[240]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}