{"id":"https://openalex.org/W4372259970","doi":"https://doi.org/10.1109/icassp49357.2023.10095236","title":"MEET: A Monte Carlo Exploration-Exploitation Trade-Off for Buffer Sampling","display_name":"MEET: A Monte Carlo Exploration-Exploitation Trade-Off for Buffer Sampling","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372259970","doi":"https://doi.org/10.1109/icassp49357.2023.10095236"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095236","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095236","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034670462","display_name":"Julius Ott","orcid":"https://orcid.org/0000-0001-8259-3070"},"institutions":[{"id":"https://openalex.org/I4210140450","display_name":"Infineon Technologies (Canada)","ror":"https://ror.org/04hbev594","country_code":"CA","type":"company","lineage":["https://openalex.org/I137594350","https://openalex.org/I4210140450"]},{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["CA","DE"],"is_corresponding":false,"raw_author_name":"Julius Ott","raw_affiliation_strings":["Infineon Technologies AG","Technical University of Munich"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Infineon Technologies AG","institution_ids":["https://openalex.org/I4210140450"]},{"raw_affiliation_string":"Technical University of Munich","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015292011","display_name":"Lorenzo Servadei","orcid":"https://orcid.org/0000-0003-4322-834X"},"institutions":[{"id":"https://openalex.org/I4210140450","display_name":"Infineon Technologies (Canada)","ror":"https://ror.org/04hbev594","country_code":"CA","type":"company","lineage":["https://openalex.org/I137594350","https://openalex.org/I4210140450"]},{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["CA","DE"],"is_corresponding":false,"raw_author_name":"Lorenzo Servadei","raw_affiliation_strings":["Infineon Technologies AG","Technical University of Munich"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Infineon Technologies AG","institution_ids":["https://openalex.org/I4210140450"]},{"raw_affiliation_string":"Technical University of Munich","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039286688","display_name":"Jose A. Arjona-Medina","orcid":"https://orcid.org/0000-0002-5033-4725"},"institutions":[{"id":"https://openalex.org/I121883995","display_name":"Johannes Kepler University of Linz","ror":"https://ror.org/052r2xn60","country_code":"AT","type":"education","lineage":["https://openalex.org/I121883995"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Jose Arjona-Medina","raw_affiliation_strings":["Johannes Kepler University Linz"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Johannes Kepler University Linz","institution_ids":["https://openalex.org/I121883995"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066988696","display_name":"Enrico Rinaldi","orcid":"https://orcid.org/0000-0003-4134-809X"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Enrico Rinaldi","raw_affiliation_strings":["University of Michigan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069095760","display_name":"Gianfranco Mauro","orcid":"https://orcid.org/0000-0003-3204-1555"},"institutions":[{"id":"https://openalex.org/I4210140450","display_name":"Infineon Technologies (Canada)","ror":"https://ror.org/04hbev594","country_code":"CA","type":"company","lineage":["https://openalex.org/I137594350","https://openalex.org/I4210140450"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Gianfranco Mauro","raw_affiliation_strings":["Infineon Technologies AG"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Infineon Technologies AG","institution_ids":["https://openalex.org/I4210140450"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076455352","display_name":"Daniela S\u00e1nchez Lopera","orcid":"https://orcid.org/0000-0001-8750-7696"},"institutions":[{"id":"https://openalex.org/I4210140450","display_name":"Infineon Technologies (Canada)","ror":"https://ror.org/04hbev594","country_code":"CA","type":"company","lineage":["https://openalex.org/I137594350","https://openalex.org/I4210140450"]},{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["CA","DE"],"is_corresponding":false,"raw_author_name":"Daniela S\u00e1nchez Lopera","raw_affiliation_strings":["Infineon Technologies AG","Technical University of Munich"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Infineon Technologies AG","institution_ids":["https://openalex.org/I4210140450"]},{"raw_affiliation_string":"Technical University of Munich","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037161101","display_name":"Michael Stephan","orcid":"https://orcid.org/0000-0003-4715-785X"},"institutions":[{"id":"https://openalex.org/I4210140450","display_name":"Infineon Technologies (Canada)","ror":"https://ror.org/04hbev594","country_code":"CA","type":"company","lineage":["https://openalex.org/I137594350","https://openalex.org/I4210140450"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Michael Stephan","raw_affiliation_strings":["Infineon Technologies AG"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Infineon Technologies AG","institution_ids":["https://openalex.org/I4210140450"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030041882","display_name":"Thomas Stadelmayer","orcid":"https://orcid.org/0000-0002-8691-6612"},"institutions":[{"id":"https://openalex.org/I4210140450","display_name":"Infineon Technologies (Canada)","ror":"https://ror.org/04hbev594","country_code":"CA","type":"company","lineage":["https://openalex.org/I137594350","https://openalex.org/I4210140450"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Thomas Stadelmayer","raw_affiliation_strings":["Infineon Technologies AG"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Infineon Technologies AG","institution_ids":["https://openalex.org/I4210140450"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045725228","display_name":"Avik Santra","orcid":"https://orcid.org/0000-0002-8156-3387"},"institutions":[{"id":"https://openalex.org/I4210140450","display_name":"Infineon Technologies (Canada)","ror":"https://ror.org/04hbev594","country_code":"CA","type":"company","lineage":["https://openalex.org/I137594350","https://openalex.org/I4210140450"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Avik Santra","raw_affiliation_strings":["Infineon Technologies AG"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Infineon Technologies AG","institution_ids":["https://openalex.org/I4210140450"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004214923","display_name":"Robert Wille","orcid":"https://orcid.org/0000-0002-4993-7860"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Robert Wille","raw_affiliation_strings":["Technical University of Munich"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technical University of Munich","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1632,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.52647606,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9851999878883362,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8943076729774475},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.759231686592102},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.7468792200088501},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6544857025146484},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6315627694129944},{"id":"https://openalex.org/keywords/importance-sampling","display_name":"Importance sampling","score":0.5650198459625244},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5637349486351013},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5159995555877686},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.5045100450515747},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.465227335691452},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4484364092350006},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4434415400028229},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.43524855375289917},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39319366216659546},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19310086965560913},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.11382749676704407},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09437727928161621},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09188464283943176}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8943076729774475},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.759231686592102},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.7468792200088501},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6544857025146484},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6315627694129944},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.5650198459625244},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5637349486351013},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5159995555877686},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.5045100450515747},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.465227335691452},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4484364092350006},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4434415400028229},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.43524855375289917},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39319366216659546},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19310086965560913},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11382749676704407},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09437727928161621},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09188464283943176},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095236","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095236","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W2046376809","https://openalex.org/W2108114251","https://openalex.org/W2121863487","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2201581102","https://openalex.org/W2280163991","https://openalex.org/W2600383743","https://openalex.org/W2767050701","https://openalex.org/W2774354230","https://openalex.org/W2781726626","https://openalex.org/W2798705390","https://openalex.org/W2807588596","https://openalex.org/W2809013025","https://openalex.org/W2946901134","https://openalex.org/W2950471160","https://openalex.org/W2963864421","https://openalex.org/W2965870268","https://openalex.org/W2968104655","https://openalex.org/W3034971196","https://openalex.org/W3101442004","https://openalex.org/W3105361666","https://openalex.org/W3203827806","https://openalex.org/W3214679404","https://openalex.org/W4221161302","https://openalex.org/W4289107544","https://openalex.org/W4302570325","https://openalex.org/W6676576766","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6687681856","https://openalex.org/W6696324988","https://openalex.org/W6735443497","https://openalex.org/W6746581380","https://openalex.org/W6747473740","https://openalex.org/W6752244597","https://openalex.org/W6757560991","https://openalex.org/W6763177082","https://openalex.org/W6780404908","https://openalex.org/W6786437897","https://openalex.org/W6809799041"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698","https://openalex.org/W2742914308","https://openalex.org/W2330004501","https://openalex.org/W2017089693","https://openalex.org/W2703295919"],"abstract_inverted_index":{"Data":[0],"selection":[1],"is":[2,74],"essential":[3],"for":[4,16,121],"any":[5],"data-based":[6],"optimization":[7],"technique,":[8],"such":[9],"as":[10],"Reinforcement":[11,26],"Learning.":[12],"State-of-the-art":[13],"sampling":[14,44,66,87,119],"strategies":[15,120],"the":[17,22,25,36,43,53,56,70,77,81,86,114],"experience":[18],"replay":[19],"buffer":[20],"improve":[21],"performance":[23,128],"of":[24,50,55,80],"Learning":[27],"agent.":[28],"However,":[29],"they":[30,40],"do":[31],"not":[32],"incorporate":[33],"uncertainty":[34,78],"in":[35],"Q-Value":[37,82],"estimation.":[38],"Consequently,":[39],"cannot":[41],"adapt":[42],"strategies,":[45],"including":[46],"exploration":[47],"and":[48,126],"exploitation":[49],"transitions,":[51],"to":[52,88],"complexity":[54],"task.":[57],"To":[58],"address":[59],"this,":[60],"this":[61],"paper":[62],"proposes":[63],"a":[64,96],"new":[65],"strategy":[67],"that":[68,113],"leverages":[69],"exploration-exploitation":[71],"trade-off.":[72],"This":[73],"enabled":[75],"by":[76,129],"estimation":[79],"function,":[83],"which":[84],"guides":[85],"explore":[89],"more":[90,97],"significant":[91],"transitions":[92],"and,":[93],"thus,":[94],"learn":[95],"efficient":[98],"policy.":[99],"Experiments":[100],"on":[101,131],"classical":[102],"control":[103],"environments":[104],"demonstrate":[105],"stable":[106],"results":[107],"across":[108],"various":[109],"environments.":[110],"They":[111],"show":[112],"proposed":[115],"method":[116],"outperforms":[117],"state-of-the-art":[118],"dense":[122],"rewards":[123],"w.r.t.":[124],"convergence":[125],"peak":[127],"26%":[130],"average.":[132]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
