{"id":"https://openalex.org/W4399620467","doi":"https://doi.org/10.1109/iwcit62550.2024.10552959","title":"Deep ExRL: Experience-Driven Deep Reinforcement Learning in Control Problems","display_name":"Deep ExRL: Experience-Driven Deep Reinforcement Learning in Control Problems","publication_year":2024,"publication_date":"2024-05-01","ids":{"openalex":"https://openalex.org/W4399620467","doi":"https://doi.org/10.1109/iwcit62550.2024.10552959"},"language":"en","primary_location":{"id":"doi:10.1109/iwcit62550.2024.10552959","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwcit62550.2024.10552959","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 12th Iran Workshop on Communication and Information Theory (IWCIT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034259286","display_name":"Ali Ghandi","orcid":null},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":true,"raw_author_name":"Ali Ghandi","raw_affiliation_strings":["Sharif University of Technology,EE,Tehran,Iran","EE, Sharif University of Technology, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"Sharif University of Technology,EE,Tehran,Iran","institution_ids":["https://openalex.org/I133529467"]},{"raw_affiliation_string":"EE, Sharif University of Technology, Tehran, Iran","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088539856","display_name":"Saeed Bagheri Shouraki","orcid":"https://orcid.org/0000-0002-7715-8004"},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Saeed Bagheri Shouraki","raw_affiliation_strings":["Sharif University of Technology,EE,Tehran,Iran","EE, Sharif University of Technology, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"Sharif University of Technology,EE,Tehran,Iran","institution_ids":["https://openalex.org/I133529467"]},{"raw_affiliation_string":"EE, Sharif University of Technology, Tehran, Iran","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068767700","display_name":"M. Riazati","orcid":"https://orcid.org/0009-0000-6528-2705"},"institutions":[{"id":"https://openalex.org/I23946033","display_name":"University of Tehran","ror":"https://ror.org/05vf56z40","country_code":"IR","type":"education","lineage":["https://openalex.org/I23946033"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Mahyar Riazati","raw_affiliation_strings":["University of Tehran,ECE,Tehran,Iran","ECE, University of Tehran, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"University of Tehran,ECE,Tehran,Iran","institution_ids":["https://openalex.org/I23946033"]},{"raw_affiliation_string":"ECE, University of Tehran, Tehran, Iran","institution_ids":["https://openalex.org/I23946033"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5034259286"],"corresponding_institution_ids":["https://openalex.org/I133529467"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06677915,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9779000282287598,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.815527081489563},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.696121335029602},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5209314227104187},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.49091464281082153},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4618386924266815}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.815527081489563},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.696121335029602},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5209314227104187},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.49091464281082153},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4618386924266815}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iwcit62550.2024.10552959","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwcit62550.2024.10552959","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 12th Iran Workshop on Communication and Information Theory (IWCIT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.550000011920929,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1556824961","https://openalex.org/W2005900670","https://openalex.org/W2202549229","https://openalex.org/W2440926996","https://openalex.org/W2501216414","https://openalex.org/W2604763608","https://openalex.org/W2604960773","https://openalex.org/W2735995851","https://openalex.org/W2736601468","https://openalex.org/W2949600457","https://openalex.org/W2951871955","https://openalex.org/W2964043796","https://openalex.org/W3011574394","https://openalex.org/W3042393254","https://openalex.org/W3088409176","https://openalex.org/W3125178068","https://openalex.org/W3147254695","https://openalex.org/W3160182403","https://openalex.org/W4205892261","https://openalex.org/W4214717370","https://openalex.org/W4224944611","https://openalex.org/W4240592325","https://openalex.org/W4285811004","https://openalex.org/W4288094104","https://openalex.org/W4317670814","https://openalex.org/W4324044667","https://openalex.org/W4383112908"],"related_works":["https://openalex.org/W2731899572","https://openalex.org/W3215138031","https://openalex.org/W3009238340","https://openalex.org/W4321369474","https://openalex.org/W4360585206","https://openalex.org/W4285208911","https://openalex.org/W3082895349","https://openalex.org/W4213079790","https://openalex.org/W2248239756","https://openalex.org/W4323565446"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"has":[3],"demonstrated":[4],"significant":[5],"accomplishments":[6],"in":[7,33,108,116],"various":[8],"fields":[9],"but":[10],"struggles":[11],"with":[12],"adaptability.":[13],"A":[14],"common":[15],"problem":[16,97],"emerges":[17],"when":[18],"an":[19],"agent":[20],"trained":[21],"to":[22,27,57,60],"ascend":[23],"a":[24,88,94,99,105,113],"hill":[25],"fails":[26],"maintain":[28],"its":[29],"performance":[30],"under":[31],"changes":[32],"conditions":[34],"such":[35],"as":[36,98],"gravity":[37],"or":[38,87],"slope.":[39],"Our":[40],"study":[41],"focuses":[42],"on":[43,93],"on-policy":[44],"deep":[45],"RL":[46],"algorithms":[47],"and":[48,71,112],"enhances":[49],"them":[50],"by":[51],"allowing":[52],"the":[53,73,84],"abstraction":[54],"of":[55],"experiences":[56],"be":[58],"transferred":[59],"new":[61],"agents.":[62],"This":[63,79],"approach":[64],"facilitates":[65],"accelerated":[66],"learning":[67],"through":[68],"improved":[69],"exploration":[70],"reduces":[72],"need":[74],"for":[75],"exhaustive":[76],"hyperparameter":[77],"optimization.":[78],"transfer":[80],"can":[81],"occur":[82],"from":[83],"same":[85],"environment":[86],"similar":[89],"one.":[90],"We":[91],"focus":[92],"simple":[95],"control":[96],"case":[100],"study.":[101],"The":[102],"results":[103],"show":[104],"50%":[106],"increase":[107],"training":[109],"success":[110],"rate":[111],"15%":[114],"boost":[115],"sample":[117],"efficiency.":[118]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
