{"id":"https://openalex.org/W4403488246","doi":"https://doi.org/10.3233/faia240666","title":"What Model Does MuZero Learn?","display_name":"What Model Does MuZero Learn?","publication_year":2024,"publication_date":"2024-10-16","ids":{"openalex":"https://openalex.org/W4403488246","doi":"https://doi.org/10.3233/faia240666"},"language":"en","primary_location":{"id":"doi:10.3233/faia240666","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240666","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA240666","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA240666","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080542181","display_name":"Jinke He","orcid":null},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Jinke He","raw_affiliation_strings":["Delft University of Technology"],"affiliations":[{"raw_affiliation_string":"Delft University of Technology","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003595753","display_name":"Thomas M. Moerland","orcid":null},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Thomas M. Moerland","raw_affiliation_strings":["Leiden University"],"affiliations":[{"raw_affiliation_string":"Leiden University","institution_ids":["https://openalex.org/I121797337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062891702","display_name":"Joery A. de Vries","orcid":null},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Joery A. de Vries","raw_affiliation_strings":["Delft University of Technology"],"affiliations":[{"raw_affiliation_string":"Delft University of Technology","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009493909","display_name":"Frans A. Oliehoek","orcid":"https://orcid.org/0000-0003-4372-5055"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Frans A. Oliehoek","raw_affiliation_strings":["Delft University of Technology"],"affiliations":[{"raw_affiliation_string":"Delft University of Technology","institution_ids":["https://openalex.org/I98358874"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080542181"],"corresponding_institution_ids":["https://openalex.org/I98358874"],"apc_list":null,"apc_paid":null,"fwci":2.7976,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.92030717,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11653","display_name":"Financial Distress and Bankruptcy Prediction","score":0.630299985408783,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11653","display_name":"Financial Distress and Bankruptcy Prediction","score":0.630299985408783,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.09709999710321426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.06379999965429306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.40374088287353516}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.40374088287353516}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3233/faia240666","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240666","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA240666","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},{"id":"pmh:oai:scholarlypublications.universiteitleiden.nl:item_4273659","is_oa":true,"landing_page_url":"https://hdl.handle.net/1887/4273659","pdf_url":"https://scholarlypublications.universiteitleiden.nl/access/item%3A4273660/view","source":{"id":"https://openalex.org/S4306400850","display_name":"Leiden Repository (Leiden University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I121797337","host_organization_name":"Leiden University","host_organization_lineage":["https://openalex.org/I121797337"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Frontiers in Artificial Intelligence and Applications -- ECAI 2024","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3233/faia240666","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240666","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA240666","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320328987","display_name":"Delft Research Centres","ror":"https://ror.org/02e2c7k09"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403488246.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Model-based":[0],"reinforcement":[1,44],"learning":[2,45,54,136],"(MBRL)":[3],"has":[4,70],"drawn":[5],"considerable":[6],"interest":[7],"in":[8,63,79,99,113],"recent":[9],"years,":[10],"given":[11],"its":[12,97,135],"promise":[13],"to":[14,26,129,165,174],"improve":[15,179],"sample":[16,111],"efficiency.":[17],"Moreover,":[18],"when":[19],"using":[20],"deep-learned":[21],"models,":[22],"it":[23],"is":[24,83,107],"possible":[25],"learn":[27],"compact":[28],"and":[29,61,96,103,142],"generalizable":[30],"models":[31,148],"from":[32,50],"data.":[33],"In":[34],"this":[35,117],"work,":[36],"we":[37,119,156,176],"study":[38],"MuZero,":[39],"a":[40,55,84,139],"state-of-the-art":[41],"deep":[42],"model-based":[43,100],"algorithm":[46],"that":[47,158],"distinguishes":[48],"itself":[49],"existing":[51,68],"algorithms":[52],"by":[53,94,183],"value-equivalent":[56,91,140],"model.":[57,187],"Despite":[58],"MuZero\u2019s":[59,159],"success":[60],"impact":[62],"the":[64,90,172,180,186],"field":[65],"of":[66,86,138],"MBRL,":[67],"literature":[69],"not":[71],"thoroughly":[72],"addressed":[73],"why":[74],"MuZero":[75,95,133],"performs":[76],"so":[77],"well":[78],"practice.":[80],"Specifically,":[81],"there":[82],"lack":[85],"in-depth":[87],"investigation":[88],"into":[89],"model":[92,161],"learned":[93,160],"effectiveness":[98],"credit":[101],"assignment":[102],"policy":[104,150,182],"improvement,":[105],"which":[106,175],"vital":[108],"for":[109,149],"achieving":[110],"efficiency":[112],"MBRL.":[114],"To":[115],"fill":[116],"gap,":[118],"explore":[120],"two":[121],"fundamental":[122],"questions":[123],"through":[124],"our":[125],"empirical":[126],"analysis:":[127],"1)":[128],"what":[130],"extent":[131,173],"does":[132],"achieve":[134],"objective":[137],"model,":[141],"2)":[143],"how":[144],"useful":[145],"are":[146],"these":[147],"improvement?":[151],"Among":[152],"various":[153],"other":[154],"insights,":[155],"conclude":[157],"cannot":[162],"effectively":[163],"generalize":[164],"evaluate":[166],"unseen":[167],"policies.":[168],"This":[169],"limitation":[170],"constrains":[171],"can":[177],"additionally":[178],"current":[181],"planning":[184],"with":[185]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
