{"id":"https://openalex.org/W2144960180","doi":"https://doi.org/10.1109/acc.2007.4282586","title":"Solving MDPs using Two-timescale Simulated Annealing with Multiplicative Weights","display_name":"Solving MDPs using Two-timescale Simulated Annealing with Multiplicative Weights","publication_year":2007,"publication_date":"2007-07-01","ids":{"openalex":"https://openalex.org/W2144960180","doi":"https://doi.org/10.1109/acc.2007.4282586","mag":"2144960180"},"language":"en","primary_location":{"id":"doi:10.1109/acc.2007.4282586","is_oa":false,"landing_page_url":"https://doi.org/10.1109/acc.2007.4282586","pdf_url":null,"source":{"id":"https://openalex.org/S4210168941","display_name":"Proceedings of the ... American Control Conference/Proceedings of the American Control Conference","issn_l":"0743-1619","issn":["0743-1619","2378-5861"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2007 American Control Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057952216","display_name":"Mohammed Shahid Abdulla","orcid":"https://orcid.org/0000-0002-0464-0617"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Mohammed Shahid Abdulla","raw_affiliation_strings":["Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","Indian Institute of Science Bangalore"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Indian Institute of Science Bangalore","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038163398","display_name":"Shalabh Bhatnagar","orcid":"https://orcid.org/0000-0001-7644-3914"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shalabh Bhatnagar","raw_affiliation_strings":["Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","Indian Institute of Science Bangalore"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Indian Institute of Science Bangalore","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5057952216"],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25008553,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"i","issue":null,"first_page":"2428","last_page":"2433"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9707000255584717,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recursion","display_name":"Recursion (computer science)","score":0.7790014743804932},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7550315856933594},{"id":"https://openalex.org/keywords/multiplicative-function","display_name":"Multiplicative function","score":0.7284912467002869},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.6361297369003296},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6259665489196777},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5502760410308838},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5007271766662598},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.49034643173217773},{"id":"https://openalex.org/keywords/adaptive-simulated-annealing","display_name":"Adaptive simulated annealing","score":0.4681693911552429},{"id":"https://openalex.org/keywords/exponential-function","display_name":"Exponential function","score":0.45950645208358765},{"id":"https://openalex.org/keywords/simulated-annealing","display_name":"Simulated annealing","score":0.44878458976745605},{"id":"https://openalex.org/keywords/theory-of-computation","display_name":"Theory of computation","score":0.4101962149143219},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.392398476600647},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.35861119627952576},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2357112467288971}],"concepts":[{"id":"https://openalex.org/C168773036","wikidata":"https://www.wikidata.org/wiki/Q264164","display_name":"Recursion (computer science)","level":2,"score":0.7790014743804932},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7550315856933594},{"id":"https://openalex.org/C42747912","wikidata":"https://www.wikidata.org/wiki/Q1048447","display_name":"Multiplicative function","level":2,"score":0.7284912467002869},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.6361297369003296},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6259665489196777},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5502760410308838},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5007271766662598},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.49034643173217773},{"id":"https://openalex.org/C46714192","wikidata":"https://www.wikidata.org/wiki/Q4680763","display_name":"Adaptive simulated annealing","level":3,"score":0.4681693911552429},{"id":"https://openalex.org/C151376022","wikidata":"https://www.wikidata.org/wiki/Q168698","display_name":"Exponential function","level":2,"score":0.45950645208358765},{"id":"https://openalex.org/C126980161","wikidata":"https://www.wikidata.org/wiki/Q863783","display_name":"Simulated annealing","level":2,"score":0.44878458976745605},{"id":"https://openalex.org/C24858836","wikidata":"https://www.wikidata.org/wiki/Q844718","display_name":"Theory of computation","level":2,"score":0.4101962149143219},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.392398476600647},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.35861119627952576},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2357112467288971},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/acc.2007.4282586","is_oa":false,"landing_page_url":"https://doi.org/10.1109/acc.2007.4282586","pdf_url":null,"source":{"id":"https://openalex.org/S4210168941","display_name":"Proceedings of the ... American Control Conference/Proceedings of the American Control Conference","issn_l":"0743-1619","issn":["0743-1619","2378-5861"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2007 American Control Conference","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.iisc.ac.in:27417","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196309","display_name":"NOT FOUND REPOSITORY (Indian Institute of Science Bangalore)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59270414","host_organization_name":"Indian Institute of Science Bangalore","host_organization_lineage":["https://openalex.org/I59270414"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7699999809265137}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320719","display_name":"Department of Science and Technology, Ministry of Science and Technology, India","ror":"https://ror.org/0101xrq71"},{"id":"https://openalex.org/F4320325165","display_name":"Schweizerische Akademie der Medizinischen Wissenschaften","ror":"https://ror.org/011cav305"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1576452626","https://openalex.org/W1587487627","https://openalex.org/W2009303086","https://openalex.org/W2061769118","https://openalex.org/W2063123975","https://openalex.org/W2082261506","https://openalex.org/W2098432798","https://openalex.org/W2156737235","https://openalex.org/W2165817674","https://openalex.org/W2322524800","https://openalex.org/W2595250839","https://openalex.org/W2898646860","https://openalex.org/W4205326910","https://openalex.org/W4285719527","https://openalex.org/W6635230301","https://openalex.org/W6666110301","https://openalex.org/W6734616217"],"related_works":["https://openalex.org/W2083644882","https://openalex.org/W4241327272","https://openalex.org/W2120406836","https://openalex.org/W1996214847","https://openalex.org/W2997055691","https://openalex.org/W2903299703","https://openalex.org/W4211224558","https://openalex.org/W4385342861","https://openalex.org/W1672382258","https://openalex.org/W2117282672"],"abstract_inverted_index":{"We":[0,107],"develop":[1],"extensions":[2,25],"of":[3,16,18,33,43,79],"the":[4,34,39,58,61,69,77,85,96],"simulated":[5,52],"annealing":[6],"with":[7],"multiplicative":[8],"weights":[9],"(SAMW)":[10],"algorithm":[11,49,59],"that":[12,50],"proposed":[13],"a":[14,88,109],"method":[15],"solution":[17],"finite-horizon":[19],"Markov":[20],"decision":[21],"processes":[22],"(FH-MDPs).":[23],"The":[24],"developed":[26],"are":[27],"in":[28,38,76,127],"three":[29],"directions:":[30],"a)":[31,67],"Use":[32],"dynamic":[35],"programming":[36],"principle":[37],"policy":[40,92,103],"update":[41],"step":[42],"SAMW":[44],"b)":[45],"A":[46],"two-timescale":[47],"actor-critic":[48],"uses":[51],"transitions":[53],"alone,":[54],"and":[55,115,124],"c)":[56],"Extending":[57],"to":[60,74],"infinite-horizon":[62],"discounted-reward":[63],"scenario.":[64],"In":[65],"particular,":[66],"reduces":[68],"storage":[70],"required":[71],"from":[72],"exponential":[73],"linear":[75],"number":[78],"actions":[80],"per":[81],"stage-state":[82],"pair.":[83],"On":[84],"faster":[86],"timescale,":[87],"'critic'":[89],"recursion":[90,101],"performs":[91,102],"evaluation":[93],"while":[94],"on":[95,119],"slower":[97],"timescale":[98],"an":[99],"'actor'":[100],"improvement":[104],"using":[105],"SAMW.":[106],"give":[108],"proof":[110],"outlining":[111],"convergence":[112],"w.p.":[113],"1":[114],"show":[116],"experimental":[117],"results":[118],"two":[120],"settings:":[121],"semiconductor":[122],"fabrication":[123],"flow":[125],"control":[126],"communication":[128],"networks.":[129]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
