{"id":"https://openalex.org/W4412567561","doi":"https://doi.org/10.1109/tai.2025.3591082","title":"Demystifying MuZero Planning: Interpreting the Learned Model","display_name":"Demystifying MuZero Planning: Interpreting the Learned Model","publication_year":2025,"publication_date":"2025-07-22","ids":{"openalex":"https://openalex.org/W4412567561","doi":"https://doi.org/10.1109/tai.2025.3591082"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2025.3591082","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2025.3591082","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018556786","display_name":"Hung Guei","orcid":"https://orcid.org/0000-0002-5590-7529"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Hung Guei","raw_affiliation_strings":["Institute of Information Science, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005899189","display_name":"Yanbing Ju","orcid":"https://orcid.org/0000-0003-4188-4343"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yan-Ru Ju","raw_affiliation_strings":["Institute of Information Science, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373305","display_name":"Wei-Yu Chen","orcid":"https://orcid.org/0000-0003-2958-8437"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wei-Yu Chen","raw_affiliation_strings":["Institute of Information Science, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027984982","display_name":"Ti-Rong Wu","orcid":"https://orcid.org/0000-0002-7532-3176"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ti-Rong Wu","raw_affiliation_strings":["Institute of Information Science, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5018556786"],"corresponding_institution_ids":["https://openalex.org/I4210098366"],"apc_list":null,"apc_paid":null,"fwci":2.0662,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87931797,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"7","issue":"2","first_page":"1025","last_page":"1036"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11810","display_name":"Complex Systems and Decision Making","score":0.13089999556541443,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11810","display_name":"Complex Systems and Decision Making","score":0.13089999556541443,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4862914979457855},{"id":"https://openalex.org/keywords/management-science","display_name":"Management science","score":0.32676178216934204},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1704554259777069}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4862914979457855},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.32676178216934204},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1704554259777069}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2025.3591082","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2025.3591082","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.5,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1625390266","https://openalex.org/W1714211023","https://openalex.org/W2126316555","https://openalex.org/W2168405694","https://openalex.org/W2194775991","https://openalex.org/W2902907165","https://openalex.org/W2982316857","https://openalex.org/W2989847975","https://openalex.org/W3171007011","https://openalex.org/W3199069166","https://openalex.org/W4249244053","https://openalex.org/W4396531193","https://openalex.org/W4403488246","https://openalex.org/W6922480057"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"MuZero":[0,59,101,138,154],"has":[1],"achieved":[2],"superhuman":[3],"performance":[4],"in":[5,123,127],"various":[6],"games":[7,125],"by":[8,29,45,105],"using":[9,106],"a":[10,134],"dynamics":[11,17,31,93,117],"network":[12,32,94,118],"to":[13,41,66,108,133,145],"predict":[14],"the":[15,25,30,47,92,116,147,153],"environment":[16],"for":[18,142],"planning,":[19],"without":[20],"relying":[21],"on":[22],"simulators.":[23],"However,":[24],"latent":[26,49,68,121],"states":[27,69,122],"learned":[28,48],"make":[33],"its":[34],"planning":[35,107],"process":[36],"opaque.":[37],"This":[38],"paper":[39],"aims":[40],"demystify":[42],"MuZero\u2019s":[43],"model":[44],"interpreting":[46],"states.":[50],"We":[51],"incorporate":[52],"observation":[53],"reconstruction":[54],"and":[55,61,76,78,85,139,150,158],"state":[56],"consistency":[57],"into":[58],"training":[60],"conduct":[62],"an":[63],"in-depth":[64],"analysis":[65],"evaluate":[67],"across":[70],"two":[71],"board":[72,124],"games:":[73,81],"9x9":[74],"Go":[75],"Gomoku,":[77],"three":[79],"Atari":[80,128],"Breakout,":[82],"Ms.":[83],"Pacman,":[84],"Pong.":[86],"Our":[87,111],"findings":[88],"reveal":[89],"that":[90,115],"while":[91],"becomes":[95],"less":[96],"accurate":[97],"over":[98],"longer":[99],"simulations,":[100],"still":[102],"performs":[103],"effectively":[104],"correct":[109],"errors.":[110],"experiments":[112],"also":[113],"show":[114],"learns":[119],"better":[120,135],"than":[126],"games.":[129],"These":[130],"insights":[131],"contribute":[132],"understanding":[136],"of":[137,152],"offer":[140],"directions":[141],"future":[143],"research":[144],"improve":[146],"performance,":[148],"robustness,":[149],"interpretability":[151],"algorithm.":[155],"The":[156],"code":[157],"data":[159],"are":[160],"available":[161],"at":[162],"<uri":[163],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[164],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://rlg.iis.sinica.edu.tw/papers/demystifying-muzero-planning</uri>.":[165]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
