{"id":"https://openalex.org/W2886498421","doi":"https://doi.org/10.23919/wac.2018.8430409","title":"Multi-Agent Exploration for Faster and Reliable Deep Q-Learning Convergence in Reinforcement Learning","display_name":"Multi-Agent Exploration for Faster and Reliable Deep Q-Learning Convergence in Reinforcement Learning","publication_year":2018,"publication_date":"2018-06-01","ids":{"openalex":"https://openalex.org/W2886498421","doi":"https://doi.org/10.23919/wac.2018.8430409","mag":"2886498421"},"language":"en","primary_location":{"id":"doi:10.23919/wac.2018.8430409","is_oa":false,"landing_page_url":"https://doi.org/10.23919/wac.2018.8430409","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 World Automation Congress (WAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://figshare.com/articles/conference_contribution/Multi-Agent_Exploration_for_Faster_and_Reliable_Deep_Q-Learning_Convergence_in_Reinforcement_Learning/20595237","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067604730","display_name":"Abhijit Majumdar","orcid":"https://orcid.org/0000-0002-0353-1849"},"institutions":[{"id":"https://openalex.org/I45438204","display_name":"The University of Texas at San Antonio","ror":"https://ror.org/01kd65564","country_code":"US","type":"education","lineage":["https://openalex.org/I45438204"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Abhijit Majumdar","raw_affiliation_strings":["Department of Electrical and Computer Engineering, The University of Texas at San Antonio, San Antonio, TX"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, The University of Texas at San Antonio, San Antonio, TX","institution_ids":["https://openalex.org/I45438204"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089777417","display_name":"Patrick Benavidez","orcid":"https://orcid.org/0000-0001-5544-6489"},"institutions":[{"id":"https://openalex.org/I45438204","display_name":"The University of Texas at San Antonio","ror":"https://ror.org/01kd65564","country_code":"US","type":"education","lineage":["https://openalex.org/I45438204"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Patrick Benavidez","raw_affiliation_strings":["Department of Electrical and Computer Engineering, The University of Texas at San Antonio, San Antonio, TX"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, The University of Texas at San Antonio, San Antonio, TX","institution_ids":["https://openalex.org/I45438204"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113537394","display_name":"Mo Jamshidi","orcid":null},"institutions":[{"id":"https://openalex.org/I45438204","display_name":"The University of Texas at San Antonio","ror":"https://ror.org/01kd65564","country_code":"US","type":"education","lineage":["https://openalex.org/I45438204"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mo Jamshidi","raw_affiliation_strings":["Department of Electrical and Computer Engineering, The University of Texas at San Antonio, San Antonio, TX"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, The University of Texas at San Antonio, San Antonio, TX","institution_ids":["https://openalex.org/I45438204"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5067604730"],"corresponding_institution_ids":["https://openalex.org/I45438204"],"apc_list":null,"apc_paid":null,"fwci":1.0153,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.82688363,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8919812440872192},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.7672882080078125},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7503723502159119},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6681832671165466},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.591858446598053},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5857284069061279},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function approximation","score":0.5658784508705139},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5599352121353149},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.513654887676239},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4975288212299347},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.47187867760658264},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4432125389575958}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8919812440872192},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.7672882080078125},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7503723502159119},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6681832671165466},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.591858446598053},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5857284069061279},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.5658784508705139},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5599352121353149},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.513654887676239},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4975288212299347},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.47187867760658264},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4432125389575958},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.23919/wac.2018.8430409","is_oa":false,"landing_page_url":"https://doi.org/10.23919/wac.2018.8430409","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 World Automation Congress (WAC)","raw_type":"proceedings-article"},{"id":"pmh:oai:figshare.com:article/20595237","is_oa":true,"landing_page_url":"https://figshare.com/articles/conference_contribution/Multi-Agent_Exploration_for_Faster_and_Reliable_Deep_Q-Learning_Convergence_in_Reinforcement_Learning/20595237","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/20595237","is_oa":true,"landing_page_url":"https://figshare.com/articles/conference_contribution/Multi-Agent_Exploration_for_Faster_and_Reliable_Deep_Q-Learning_Convergence_in_Reinforcement_Learning/20595237","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1515851193","https://openalex.org/W1552169927","https://openalex.org/W1806891645","https://openalex.org/W1949003493","https://openalex.org/W2097498347","https://openalex.org/W2108892923","https://openalex.org/W2120346334","https://openalex.org/W2121092017","https://openalex.org/W2145339207","https://openalex.org/W2155968351","https://openalex.org/W2173564293","https://openalex.org/W2201581102","https://openalex.org/W2395575420","https://openalex.org/W2560647685","https://openalex.org/W2592798481","https://openalex.org/W2594794854","https://openalex.org/W2746553466","https://openalex.org/W2885164411","https://openalex.org/W2949201811","https://openalex.org/W2951799221","https://openalex.org/W2951896791","https://openalex.org/W2962938178","https://openalex.org/W2962990769","https://openalex.org/W2963000099","https://openalex.org/W2963095800","https://openalex.org/W2963407617","https://openalex.org/W2963477884","https://openalex.org/W2963658727","https://openalex.org/W2964001908","https://openalex.org/W2964345382","https://openalex.org/W3023539457","https://openalex.org/W4299802797","https://openalex.org/W4300799055","https://openalex.org/W4301311528","https://openalex.org/W4302379208","https://openalex.org/W6638520917","https://openalex.org/W6640746738","https://openalex.org/W6674705169","https://openalex.org/W6676014644","https://openalex.org/W6677737365","https://openalex.org/W6677939520","https://openalex.org/W6685388067","https://openalex.org/W6685444567","https://openalex.org/W6687681856","https://openalex.org/W6712181171","https://openalex.org/W6726878482","https://openalex.org/W6730111887","https://openalex.org/W6734678876","https://openalex.org/W6735011893","https://openalex.org/W6736572398","https://openalex.org/W6738796088","https://openalex.org/W6740801417"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W3087814763","https://openalex.org/W2361647908","https://openalex.org/W2937181779","https://openalex.org/W2537866915","https://openalex.org/W4377865163","https://openalex.org/W2089415692"],"abstract_inverted_index":{"Function":[0],"approximation":[1],"based":[2],"Q-learning,":[3],"using":[4,27],"deep":[5,29],"q-learning":[6],"has":[7],"had":[8],"recent":[9],"extraordinary":[10],"developments":[11],"applicable":[12],"to":[13,21,47,61],"generalized":[14],"applications.":[15],"Many":[16],"techniques":[17],"have":[18],"been":[19],"introduced":[20],"counter":[22],"the":[23,37],"inherent":[24],"caveats":[25],"in":[26,32],"a":[28],"neural":[30],"network":[31],"reinforcement":[33],"learning.":[34],"We":[35],"demonstrate":[36],"use":[38],"of":[39],"multi-agent":[40],"virtual":[41],"exploration":[42],"integrated":[43],"into":[44],"existing":[45],"algorithms":[46],"show":[48,53],"better":[49,65],"convergence":[50],"property,":[51],"and":[52,64],"how":[54],"they":[55],"can":[56],"be":[57],"applied":[58],"as":[59],"extensions":[60],"provide":[62],"faster":[63],"converged":[66],"values.":[67]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}