{"id":"https://openalex.org/W4406461610","doi":"https://doi.org/10.1109/bigdata62323.2024.10825395","title":"Quality-Aware Experience Exploitation in Model-Based Reinforcement Learning","display_name":"Quality-Aware Experience Exploitation in Model-Based Reinforcement Learning","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406461610","doi":"https://doi.org/10.1109/bigdata62323.2024.10825395"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825395","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825395","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100436459","display_name":"Guang Yang","orcid":"https://orcid.org/0000-0001-5622-1679"},"institutions":[{"id":"https://openalex.org/I184840846","display_name":"Virginia Commonwealth University","ror":"https://ror.org/02nkdxk79","country_code":"US","type":"education","lineage":["https://openalex.org/I184840846"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Guang Yang","raw_affiliation_strings":["Virginia Commonwealth University,Dept. of Computer Science,Richmond,VA,USA"],"affiliations":[{"raw_affiliation_string":"Virginia Commonwealth University,Dept. of Computer Science,Richmond,VA,USA","institution_ids":["https://openalex.org/I184840846"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102013440","display_name":"Jiahe Li","orcid":"https://orcid.org/0009-0003-5092-1326"},"institutions":[{"id":"https://openalex.org/I184840846","display_name":"Virginia Commonwealth University","ror":"https://ror.org/02nkdxk79","country_code":"US","type":"education","lineage":["https://openalex.org/I184840846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiahe Li","raw_affiliation_strings":["Virginia Commonwealth University,Dept. of Computer Science,Richmond,VA,USA"],"affiliations":[{"raw_affiliation_string":"Virginia Commonwealth University,Dept. of Computer Science,Richmond,VA,USA","institution_ids":["https://openalex.org/I184840846"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039576301","display_name":"Ziye Geng","orcid":null},"institutions":[{"id":"https://openalex.org/I184840846","display_name":"Virginia Commonwealth University","ror":"https://ror.org/02nkdxk79","country_code":"US","type":"education","lineage":["https://openalex.org/I184840846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ziye Geng","raw_affiliation_strings":["Virginia Commonwealth University,Dept. of Computer Science,Richmond,VA,USA"],"affiliations":[{"raw_affiliation_string":"Virginia Commonwealth University,Dept. of Computer Science,Richmond,VA,USA","institution_ids":["https://openalex.org/I184840846"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108048105","display_name":"Changqing Luo","orcid":"https://orcid.org/0000-0003-0875-9380"},"institutions":[{"id":"https://openalex.org/I184840846","display_name":"Virginia Commonwealth University","ror":"https://ror.org/02nkdxk79","country_code":"US","type":"education","lineage":["https://openalex.org/I184840846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Changqing Luo","raw_affiliation_strings":["Virginia Commonwealth University,Dept. of Computer Science,Richmond,VA,USA"],"affiliations":[{"raw_affiliation_string":"Virginia Commonwealth University,Dept. of Computer Science,Richmond,VA,USA","institution_ids":["https://openalex.org/I184840846"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100436459"],"corresponding_institution_ids":["https://openalex.org/I184840846"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23164454,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1161","last_page":"1166"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.810758113861084},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7259095907211304},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.573197603225708},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36501118540763855}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.810758113861084},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7259095907211304},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.573197603225708},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36501118540763855},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825395","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825395","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W1980035368","https://openalex.org/W2492794003","https://openalex.org/W2596585349","https://openalex.org/W2890208753","https://openalex.org/W2890960027","https://openalex.org/W2911495555","https://openalex.org/W2913954081","https://openalex.org/W2914304175","https://openalex.org/W3036619998","https://openalex.org/W3038822267","https://openalex.org/W3175558129","https://openalex.org/W4214717370","https://openalex.org/W4221138464","https://openalex.org/W4252279978","https://openalex.org/W4391259941","https://openalex.org/W6677067356","https://openalex.org/W6683195989","https://openalex.org/W6747473740","https://openalex.org/W6748519856","https://openalex.org/W6748839928","https://openalex.org/W6751087324","https://openalex.org/W6751494529","https://openalex.org/W6753183898","https://openalex.org/W6753264383","https://openalex.org/W6754184789","https://openalex.org/W6754302096","https://openalex.org/W6764053384","https://openalex.org/W6779142360","https://openalex.org/W6780856455","https://openalex.org/W6783814031","https://openalex.org/W6783909840","https://openalex.org/W6783988234","https://openalex.org/W6809631584"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4306904969","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2138720691","https://openalex.org/W2376932109"],"abstract_inverted_index":{"In":[0],"model-based":[1,78],"reinforcement":[2],"learning":[3,45,99,138],"(MBRL),":[4],"the":[5,25,37,75,92,108,116,121],"quality":[6,39,72,109],"of":[7,28,40,77,94,124,127],"simulated":[8,30,66,95,144],"experiences":[9,31,42,67,96],"is":[10,110],"a":[11,55,84],"critical":[12],"bottleneck":[13],"to":[14,73,89],"effective":[15],"policy":[16,44,79,98,137],"learning.":[17,80],"Existing":[18],"research":[19],"has":[20,33],"primarily":[21],"focused":[22],"on":[23,69,97,101],"reducing":[24],"generation":[26],"errors":[27],"these":[29,41],"but":[32],"largely":[34],"ignored":[35],"how":[36],"varying":[38],"impacts":[43],"during":[46],"their":[47,70,102],"exploitation.":[48],"To":[49],"bridge":[50],"this":[51],"gap,":[52],"we":[53,82],"propose":[54],"novel":[56],"quality-aware":[57],"experience":[58],"exploitation":[59],"scheme,":[60],"called":[61],"QA2E,":[62],"which":[63],"dynamically":[64,90],"exploits":[65],"based":[68,100],"assessed":[71,103],"enhance":[74],"effectiveness":[76],"Particularly,":[81],"develop":[83],"weighted":[85],"Bellman":[86],"backup":[87],"approach":[88],"adjust":[91],"influence":[93],"quality.":[104],"Since":[105],"directly":[106],"measuring":[107],"impractical,":[111],"QA2E":[112,134],"estimates":[113],"it":[114],"through":[115],"epistemic":[117],"uncertainty":[118],"derived":[119],"from":[120],"prediction":[122],"results":[123,131],"an":[125],"ensemble":[126],"transition":[128],"models.":[129],"Experimental":[130],"demonstrate":[132],"that":[133],"significantly":[135],"improves":[136],"performance":[139],"by":[140],"more":[141],"effectively":[142],"exploiting":[143],"experiences.":[145]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
