{"id":"https://openalex.org/W2152520382","doi":"https://doi.org/10.1109/fuzzy.2007.4295497","title":"Fuzzy Approximation for Convergent Model-Based Reinforcement Learning","display_name":"Fuzzy Approximation for Convergent Model-Based Reinforcement Learning","publication_year":2007,"publication_date":"2007-06-01","ids":{"openalex":"https://openalex.org/W2152520382","doi":"https://doi.org/10.1109/fuzzy.2007.4295497","mag":"2152520382"},"language":"en","primary_location":{"id":"doi:10.1109/fuzzy.2007.4295497","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fuzzy.2007.4295497","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2007 IEEE International Fuzzy Systems Conference","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://orbi.uliege.be/handle/2268/13545","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058935509","display_name":"Lucian Bu\u015foniu","orcid":"https://orcid.org/0000-0001-8017-1296"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"L. Busoniu","raw_affiliation_strings":["Center for Systems and Control, Delft University of Technnology, Netherlands"],"affiliations":[{"raw_affiliation_string":"Center for Systems and Control, Delft University of Technnology, Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077011518","display_name":"Damien Ernst","orcid":"https://orcid.org/0000-0002-3035-8260"},"institutions":[{"id":"https://openalex.org/I102475099","display_name":"Sup\u00e9lec","ror":"https://ror.org/00n7gwn90","country_code":"FR","type":"education","lineage":["https://openalex.org/I102475099"]},{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["FR","NL"],"is_corresponding":false,"raw_author_name":"D. Ernst","raw_affiliation_strings":["Center for Systems and Control of the Delft University of Techno-logy, The Netherlands","port Technology Department of TU Delft. Damien Ernst is with Sup\u00e9lec, Rennes, France"],"affiliations":[{"raw_affiliation_string":"Center for Systems and Control of the Delft University of Techno-logy, The Netherlands","institution_ids":["https://openalex.org/I98358874"]},{"raw_affiliation_string":"port Technology Department of TU Delft. Damien Ernst is with Sup\u00e9lec, Rennes, France","institution_ids":["https://openalex.org/I102475099"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026547410","display_name":"Bart De Schutter","orcid":"https://orcid.org/0000-0001-9867-6196"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"B. De Schutter","raw_affiliation_strings":["Department of Marine and Transport Technology, Technical University Delft, Netherlands"],"affiliations":[{"raw_affiliation_string":"Department of Marine and Transport Technology, Technical University Delft, Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084264842","display_name":"Robert Babu\u0161ka","orcid":"https://orcid.org/0000-0001-9578-8598"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"R. Babuska","raw_affiliation_strings":["Center for Systems and Control, Delft University\uc2a0of\uc2a0Technology, Netherlands"],"affiliations":[{"raw_affiliation_string":"Center for Systems and Control, Delft University\uc2a0of\uc2a0Technology, Netherlands","institution_ids":["https://openalex.org/I98358874"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5058935509"],"corresponding_institution_ids":["https://openalex.org/I98358874"],"apc_list":null,"apc_paid":null,"fwci":2.4182,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.9055764,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9865000247955322,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10621","display_name":"Gene Regulatory Network Analysis","score":0.9514999985694885,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7206630706787109},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6749313473701477},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5923198461532593},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5761587023735046},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5006449222564697},{"id":"https://openalex.org/keywords/fuzzy-logic","display_name":"Fuzzy logic","score":0.4975424110889435},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.4932350516319275},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4585123062133789},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4552733302116394},{"id":"https://openalex.org/keywords/fuzzy-control-system","display_name":"Fuzzy control system","score":0.4220818877220154},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3466125726699829},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.290702760219574}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7206630706787109},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6749313473701477},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5923198461532593},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5761587023735046},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5006449222564697},{"id":"https://openalex.org/C58166","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy logic","level":2,"score":0.4975424110889435},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.4932350516319275},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4585123062133789},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4552733302116394},{"id":"https://openalex.org/C195975749","wikidata":"https://www.wikidata.org/wiki/Q1475705","display_name":"Fuzzy control system","level":3,"score":0.4220818877220154},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3466125726699829},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.290702760219574},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/fuzzy.2007.4295497","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fuzzy.2007.4295497","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2007 IEEE International Fuzzy Systems Conference","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-00202779v1","is_oa":false,"landing_page_url":"https://hal.science/hal-00202779","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2007 IEEE International Conference on Fuzzy Systems, Jul 2007, Londres, United Kingdom. pp.CDROM","raw_type":"Conference papers"},{"id":"pmh:oai:orbi.ulg.ac.be:2268/13545","is_oa":true,"landing_page_url":"https://orbi.uliege.be/handle/2268/13545","pdf_url":null,"source":{"id":"https://openalex.org/S4306400651","display_name":"Open Repository and Bibliography (University of Li\u00e8ge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I157674565","host_organization_name":"University of Li\u00e8ge","host_organization_lineage":["https://openalex.org/I157674565"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 2007 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE-07) (2007); 2007 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE-07), London, United Kingdom [GB], 23-26 July 2007","raw_type":"peer reviewed"}],"best_oa_location":{"id":"pmh:oai:orbi.ulg.ac.be:2268/13545","is_oa":true,"landing_page_url":"https://orbi.uliege.be/handle/2268/13545","pdf_url":null,"source":{"id":"https://openalex.org/S4306400651","display_name":"Open Repository and Bibliography (University of Li\u00e8ge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I157674565","host_organization_name":"University of Li\u00e8ge","host_organization_lineage":["https://openalex.org/I157674565"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 2007 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE-07) (2007); 2007 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE-07), London, United Kingdom [GB], 23-26 July 2007","raw_type":"peer reviewed"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1547105496","https://openalex.org/W1550698229","https://openalex.org/W1552830313","https://openalex.org/W1590631717","https://openalex.org/W2004283713","https://openalex.org/W2098432798","https://openalex.org/W2107726111","https://openalex.org/W2107977784","https://openalex.org/W2120346334","https://openalex.org/W2121863487","https://openalex.org/W2125074935","https://openalex.org/W2125510930","https://openalex.org/W2143490508","https://openalex.org/W2150999967","https://openalex.org/W2158316397","https://openalex.org/W2164056559","https://openalex.org/W2166760422","https://openalex.org/W2169982856","https://openalex.org/W2266260776","https://openalex.org/W2305205647","https://openalex.org/W2586680856","https://openalex.org/W3101639353","https://openalex.org/W4214717370","https://openalex.org/W4245296547","https://openalex.org/W4285719527","https://openalex.org/W6674995601","https://openalex.org/W6677737365","https://openalex.org/W6678545291","https://openalex.org/W6678900246","https://openalex.org/W6684199269","https://openalex.org/W6693743473","https://openalex.org/W6785390088"],"related_works":["https://openalex.org/W2075768550","https://openalex.org/W3022218857","https://openalex.org/W2369178846","https://openalex.org/W4306904969","https://openalex.org/W2370289839","https://openalex.org/W2082716031","https://openalex.org/W2933494595","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,5],"(RL)":[2],"is":[3,100,104,126,142],"a":[4,85,118],"control":[6,26],"paradigm":[7],"that":[8,22,96,125],"provides":[9],"well-understood":[10],"algorithms":[11,20],"with":[12],"good":[13],"convergence":[14,73],"and":[15,25,49,63,94],"consistency":[16],"properties.":[17],"Unfortunately,":[18],"these":[19,59],"require":[21],"process":[23],"states":[24,48],"actions":[27,52],"take":[28],"only":[29],"discrete":[30],"values.":[31],"Approximate":[32],"solutions":[33,62],"using":[34],"fuzzy":[35,86],"representations":[36],"have":[37],"been":[38,77],"proposed":[39],"in":[40,112],"the":[41,44,47,51,56,64,90,97,123,135],"literature":[42],"for":[43,89],"case":[45],"when":[46],"possibly":[50],"are":[53],"continuous.":[54],"However,":[55],"link":[57],"between":[58],"mainly":[60],"heuristic":[61],"larger":[65],"body":[66],"of":[67,109,122],"work":[68],"on":[69,106],"approximate":[70,113],"RL,":[71],"including":[72],"results,":[74],"has":[75],"not":[76],"made":[78],"explicit.":[79],"In":[80],"this":[81],"paper,":[82],"we":[83],"propose":[84,117],"approximation":[87],"structure":[88],"Q-value":[91],"iteration":[92],"algorithm,":[93],"show":[95],"resulting":[98],"algorithm":[99,124],"convergent.":[101],"The":[102],"proof":[103],"based":[105],"an":[107],"extension":[108],"previous":[110],"results":[111],"RL.":[114],"We":[115],"then":[116],"modified,":[119],"serial":[120],"version":[121],"guaranteed":[127],"to":[128],"converge":[129],"at":[130],"least":[131],"as":[132,134],"fast":[133],"original":[136],"algorithm.":[137],"An":[138],"illustrative":[139],"simulation":[140],"example":[141],"also":[143],"provided.":[144]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
