{"id":"https://openalex.org/W3119186746","doi":"https://doi.org/10.1109/jas.2021.1003814","title":"Multiagent Reinforcement Learning: Rollout and Policy Iteration","display_name":"Multiagent Reinforcement Learning: Rollout and Policy Iteration","publication_year":2021,"publication_date":"2021-01-09","ids":{"openalex":"https://openalex.org/W3119186746","doi":"https://doi.org/10.1109/jas.2021.1003814","mag":"3119186746"},"language":"en","primary_location":{"id":"doi:10.1109/jas.2021.1003814","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jas.2021.1003814","pdf_url":null,"source":{"id":"https://openalex.org/S2484288132","display_name":"IEEE/CAA Journal of Automatica Sinica","issn_l":"2329-9266","issn":["2329-9266","2329-9274"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/CAA Journal of Automatica Sinica","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039911471","display_name":"Dimitri P. Bertsekas","orcid":"https://orcid.org/0000-0001-6909-7208"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]},{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dimitri Bertsekas","raw_affiliation_strings":["Arizona State University (ASU), Tempe, AZ, USA","Massachusetts Institute of Technology (MIT), Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Arizona State University (ASU), Tempe, AZ, USA","institution_ids":["https://openalex.org/I55732556"]},{"raw_affiliation_string":"Massachusetts Institute of Technology (MIT), Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5039911471"],"corresponding_institution_ids":["https://openalex.org/I55732556","https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":10.9172,"has_fulltext":false,"cited_by_count":101,"citation_normalized_percentile":{"value":0.9865654,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"8","issue":"2","first_page":"249","last_page":"272"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11674","display_name":"Sports Analytics and Performance","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6858057975769043},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6638649702072144},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6371617317199707},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5922995805740356},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.511879563331604},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4725603461265564},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.4576593339443207},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.41657596826553345},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23162305355072021},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2286260724067688},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1711057424545288}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6858057975769043},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6638649702072144},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6371617317199707},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5922995805740356},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.511879563331604},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4725603461265564},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.4576593339443207},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.41657596826553345},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23162305355072021},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2286260724067688},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1711057424545288},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jas.2021.1003814","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jas.2021.1003814","pdf_url":null,"source":{"id":"https://openalex.org/S2484288132","display_name":"IEEE/CAA Journal of Automatica Sinica","issn_l":"2329-9266","issn":["2329-9266","2329-9274"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/CAA Journal of Automatica Sinica","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7200000286102295}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":106,"referenced_works":["https://openalex.org/W206679605","https://openalex.org/W613360614","https://openalex.org/W1502991072","https://openalex.org/W1542941925","https://openalex.org/W1576452626","https://openalex.org/W1596538644","https://openalex.org/W1603765807","https://openalex.org/W1788292158","https://openalex.org/W1918371733","https://openalex.org/W1986014385","https://openalex.org/W1991888757","https://openalex.org/W1999874108","https://openalex.org/W2027855416","https://openalex.org/W2050084431","https://openalex.org/W2065118686","https://openalex.org/W2069525910","https://openalex.org/W2079394378","https://openalex.org/W2081797037","https://openalex.org/W2091048464","https://openalex.org/W2092025421","https://openalex.org/W2092792265","https://openalex.org/W2096145798","https://openalex.org/W2097498347","https://openalex.org/W2098432798","https://openalex.org/W2099618002","https://openalex.org/W2106471678","https://openalex.org/W2107544712","https://openalex.org/W2115118348","https://openalex.org/W2116442740","https://openalex.org/W2119380668","https://openalex.org/W2120391138","https://openalex.org/W2121863487","https://openalex.org/W2124477018","https://openalex.org/W2131490088","https://openalex.org/W2133102604","https://openalex.org/W2134289401","https://openalex.org/W2135997697","https://openalex.org/W2145339207","https://openalex.org/W2147492008","https://openalex.org/W2147750403","https://openalex.org/W2148864095","https://openalex.org/W2154533441","https://openalex.org/W2155143729","https://openalex.org/W2156534877","https://openalex.org/W2164894752","https://openalex.org/W2165060096","https://openalex.org/W2167509584","https://openalex.org/W2395575420","https://openalex.org/W2408811396","https://openalex.org/W2563857824","https://openalex.org/W2575731723","https://openalex.org/W2581566809","https://openalex.org/W2615688110","https://openalex.org/W2740377041","https://openalex.org/W2747213132","https://openalex.org/W2765795464","https://openalex.org/W2768629321","https://openalex.org/W2772709170","https://openalex.org/W2788115019","https://openalex.org/W2898567809","https://openalex.org/W2908261578","https://openalex.org/W2941629501","https://openalex.org/W2950989964","https://openalex.org/W2963000099","https://openalex.org/W2963600139","https://openalex.org/W2963747324","https://openalex.org/W2970129473","https://openalex.org/W2978236943","https://openalex.org/W2981038142","https://openalex.org/W2991046523","https://openalex.org/W2994625392","https://openalex.org/W2994779591","https://openalex.org/W3006146203","https://openalex.org/W3006645117","https://openalex.org/W3009835149","https://openalex.org/W3010758700","https://openalex.org/W3012122206","https://openalex.org/W3015899324","https://openalex.org/W3027624373","https://openalex.org/W3031673669","https://openalex.org/W3099134564","https://openalex.org/W3099235411","https://openalex.org/W3102905931","https://openalex.org/W3107686539","https://openalex.org/W3210069205","https://openalex.org/W4214717370","https://openalex.org/W4233197277","https://openalex.org/W4233696721","https://openalex.org/W4254529637","https://openalex.org/W4254547512","https://openalex.org/W4287601512","https://openalex.org/W4293489810","https://openalex.org/W4295402504","https://openalex.org/W4297627396","https://openalex.org/W4299802797","https://openalex.org/W4300773225","https://openalex.org/W4306179383","https://openalex.org/W6673443209","https://openalex.org/W6674705169","https://openalex.org/W6679703492","https://openalex.org/W6680235470","https://openalex.org/W6683064110","https://openalex.org/W6684022048","https://openalex.org/W6712181171","https://openalex.org/W6738796088","https://openalex.org/W6785331629"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W4297873223","https://openalex.org/W2350784623","https://openalex.org/W2126211886","https://openalex.org/W3009457412","https://openalex.org/W2992629954","https://openalex.org/W2083819663"],"abstract_inverted_index":{"We":[0,47,286],"discuss":[1,288],"the":[2,15,34,57,65,69,101,113,118,137,152,158,195,215,230,237,241,249,254,268,283,295,302,313,325,370,404],"solution":[3],"of":[4,17,37,54,100,115,140,154,220,232,243,251,273,304,321,346,353,372,396,406],"complex":[5,384],"multistage":[6],"decision":[7],"problems":[8,135,156],"using":[9],"methods":[10,133],"that":[11,109,177,204,262,293],"are":[12,399,423],"based":[13,402],"on":[14,108,129,173,403],"idea":[16],"policy":[18,26,44,59,67,98,348,407,436],"iteration":[19,62],"(PI),":[20],"i.e.,":[21],"start":[22],"from":[23,214],"some":[24,211],"base":[25,66,97,207,284,435],"and":[27,86,131,165,168,329,338,408,419],"generate":[28],"an":[29,189,278,361,391,440],"improved":[30,43,279],"policy.":[31,110,285,393],"Rollout":[32],"is":[33,45,87,112,151,309,379],"simplest":[35],"method":[36],"this":[38,125],"type,":[39],"where":[40,56,136,157],"just":[41],"one":[42,99,352],"generated.":[46],"can":[48,82,94,429],"view":[49],"PI":[50,340,355,376,413],"as":[51,64,96],"repeated":[52],"application":[53],"rollout,":[55],"rollout":[58,76,93,130,202,239,265,291,443],"at":[60,192,224],"each":[61,143],"serves":[63],"for":[68,89,134,437],"next":[70],"iteration.":[71],"In":[72,124],"contrast":[73],"with":[74,183,210,229,248,369],"PI,":[75,105],"has":[77,267],"a":[78,147,161,166,174,200,206,343,367,382,434],"robustness":[79],"property:":[80],"it":[81,276],"be":[83,430],"applied":[84],"on-line":[85,90,319,441],"suitable":[88],"replanning.":[91],"Moreover,":[92],"use":[95,303,405,438],"policies":[102],"produced":[103],"by":[104,146],"thereby":[106],"improving":[107],"This":[111,150],"type":[114,345],"scheme":[116],"underlying":[117],"prominently":[119],"successful":[120],"AlphaZero":[121],"chess":[122],"program.":[123],"paper":[126],"we":[127,187,260,334,357,387],"focus":[128],"PI-like":[132],"control":[138,180,322],"consists":[139],"multiple":[141],"components":[142],"selected":[144],"(conceptually)":[145],"separate":[148],"agent.":[149],"class":[153],"multiagent":[155,264,290,442],"agents":[159,196,296],"have":[160],"shared":[162,167],"objective":[163],"function,":[164],"perfect":[169],"state":[170,184,385],"information.":[171],"Based":[172],"problem":[175],"reformulation":[176],"trades":[178],"off":[179],"space":[181,185],"complexity":[182],"complexity,":[186],"develop":[188],"approach,":[190],"whereby":[191],"every":[193,225],"stage,":[194],"sequentially":[197],"(one-at-a-time)":[198],"execute":[199],"local":[201],"algorithm":[203,266],"uses":[205],"policy,":[208,364],"together":[209],"coordinating":[212],"information":[213],"other":[216,330],"agents.":[217,233,252,326],"The":[218],"amount":[219,242],"total":[221,244],"computation":[222,245],"required":[223,258],"stage":[226],"grows":[227,246],"linearly":[228],"number":[231,250],"By":[234],"contrast,":[235],"in":[236,257,415,439],"standard":[238,274],"algorithm,":[240,377],"exponentially":[247],"Despite":[253],"dramatic":[255],"reduction":[256],"computation,":[259],"show":[261],"our":[263,354],"fundamental":[269],"cost":[270,314],"improvement":[271,315,349],"property":[272],"rollout:":[275],"guarantees":[277],"performance":[280],"relative":[281],"to":[282,297,311,360,390,432],"also":[287,335,400],"autonomous":[289],"schemes":[292],"allow":[294],"make":[298],"decisions":[299],"autonomously":[300],"through":[301],"precomputed":[305],"signaling":[306],"information,":[307],"which":[308,378],"sufficient":[310],"maintain":[312],"property,":[316],"without":[317],"any":[318],"coordination":[320],"selection":[323],"between":[324],"For":[327,351,374],"discounted":[328],"infinite":[331],"horizon":[332],"problems,":[333],"consider":[336],"exact":[337,418],"approximate":[339,421],"algorithms":[341,398],"involving":[342],"new":[344],"one-agent-at-a-time":[347],"operation.":[350],"algorithms,":[356,414],"prove":[358,388],"convergence":[359,389],"agent-by-agent":[362],"optimal":[363,392],"thus":[365],"establishing":[366],"connection":[368],"theory":[371],"teams.":[373],"another":[375],"executed":[380],"over":[381],"more":[383],"space,":[386],"Approximate":[394],"forms":[395],"these":[397],"given,":[401],"value":[409],"neural":[410],"networks.":[411],"These":[412],"both":[416],"their":[417,420],"form":[422],"strictly":[424],"off-line":[425],"methods,":[426],"but":[427],"they":[428],"used":[431],"provide":[433],"scheme.":[444]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":23},{"year":2023,"cited_by_count":18},{"year":2022,"cited_by_count":22},{"year":2021,"cited_by_count":15}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
