{"id":"https://openalex.org/W4312290571","doi":"https://doi.org/10.1109/mlsp55214.2022.9943376","title":"The Minimum Value State Problem in Actor-Critic Networks","display_name":"The Minimum Value State Problem in Actor-Critic Networks","publication_year":2022,"publication_date":"2022-08-22","ids":{"openalex":"https://openalex.org/W4312290571","doi":"https://doi.org/10.1109/mlsp55214.2022.9943376"},"language":"en","primary_location":{"id":"doi:10.1109/mlsp55214.2022.9943376","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp55214.2022.9943376","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 32nd International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067109059","display_name":"Alvaro Velasquez","orcid":"https://orcid.org/0000-0001-6757-105X"},"institutions":[{"id":"https://openalex.org/I1280414376","display_name":"United States Air Force Research Laboratory","ror":"https://ror.org/02e2egq70","country_code":"US","type":"facility","lineage":["https://openalex.org/I1280414376","https://openalex.org/I1330347796","https://openalex.org/I4210102105","https://openalex.org/I4389425425"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Alvaro Velasquez","raw_affiliation_strings":["Information Directorate, Air Force Research Laboratory,Rome,NY,USA","Information Directorate, Air Force Research Laboratory, Rome, NY, USA"],"affiliations":[{"raw_affiliation_string":"Information Directorate, Air Force Research Laboratory,Rome,NY,USA","institution_ids":["https://openalex.org/I1280414376"]},{"raw_affiliation_string":"Information Directorate, Air Force Research Laboratory, Rome, NY, USA","institution_ids":["https://openalex.org/I1280414376"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003538946","display_name":"Ismail Alkhouri","orcid":"https://orcid.org/0000-0002-5754-5509"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ismail R. Alkhouri","raw_affiliation_strings":["University of Central Florida,Department of Electrical and Computer Engineering,Orlando,FL,USA","Department of Electrical and Computer Engineering, University of Central Florida, Orlando, FL, USA"],"affiliations":[{"raw_affiliation_string":"University of Central Florida,Department of Electrical and Computer Engineering,Orlando,FL,USA","institution_ids":["https://openalex.org/I106165777"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Central Florida, Orlando, FL, USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074924704","display_name":"Brett Bissey","orcid":null},"institutions":[{"id":"https://openalex.org/I44896327","display_name":"Mitre (United States)","ror":"https://ror.org/03ks2a131","country_code":"US","type":"company","lineage":["https://openalex.org/I44896327"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brett Bissey","raw_affiliation_strings":["MITRE Corporation,McLean,VA,USA","MITRE Corporation, McLean, VA, USA"],"affiliations":[{"raw_affiliation_string":"MITRE Corporation,McLean,VA,USA","institution_ids":["https://openalex.org/I44896327"]},{"raw_affiliation_string":"MITRE Corporation, McLean, VA, USA","institution_ids":["https://openalex.org/I44896327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066664040","display_name":"Lior Barak","orcid":null},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lior Barak","raw_affiliation_strings":["University of Central Florida,Department of Electrical and Computer Engineering,Orlando,FL,USA","Department of Electrical and Computer Engineering, University of Central Florida, Orlando, FL, USA"],"affiliations":[{"raw_affiliation_string":"University of Central Florida,Department of Electrical and Computer Engineering,Orlando,FL,USA","institution_ids":["https://openalex.org/I106165777"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Central Florida, Orlando, FL, USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003612688","display_name":"George Atia","orcid":"https://orcid.org/0000-0001-7958-9855"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George K. Atia","raw_affiliation_strings":["University of Central Florida,Department of Electrical and Computer Engineering,Orlando,FL,USA","Department of Electrical and Computer Engineering, University of Central Florida, Orlando, FL, USA"],"affiliations":[{"raw_affiliation_string":"University of Central Florida,Department of Electrical and Computer Engineering,Orlando,FL,USA","institution_ids":["https://openalex.org/I106165777"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Central Florida, Orlando, FL, USA","institution_ids":["https://openalex.org/I106165777"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5067109059"],"corresponding_institution_ids":["https://openalex.org/I1280414376"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13233617,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"271","issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7826812863349915},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7159056663513184},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.710222601890564},{"id":"https://openalex.org/keywords/monte-carlo-tree-search","display_name":"Monte Carlo tree search","score":0.5961796641349792},{"id":"https://openalex.org/keywords/linear-programming","display_name":"Linear programming","score":0.5719332695007324},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5412615537643433},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4922567903995514},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.47980859875679016},{"id":"https://openalex.org/keywords/integer-programming","display_name":"Integer programming","score":0.4767668843269348},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4292052984237671},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.4016137719154358},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2946288585662842},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24120929837226868},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1755017638206482}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7826812863349915},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7159056663513184},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.710222601890564},{"id":"https://openalex.org/C46149586","wikidata":"https://www.wikidata.org/wiki/Q11785332","display_name":"Monte Carlo tree search","level":3,"score":0.5961796641349792},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.5719332695007324},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5412615537643433},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4922567903995514},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.47980859875679016},{"id":"https://openalex.org/C56086750","wikidata":"https://www.wikidata.org/wiki/Q6042592","display_name":"Integer programming","level":2,"score":0.4767668843269348},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4292052984237671},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.4016137719154358},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2946288585662842},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24120929837226868},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1755017638206482},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mlsp55214.2022.9943376","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp55214.2022.9943376","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 32nd International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5199999809265137}],"awards":[{"id":"https://openalex.org/G2243852676","display_name":null,"funder_award_id":"20RICOR012","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G3277782020","display_name":null,"funder_award_id":"FA8750-20-3-1004","funder_id":"https://openalex.org/F4320338294","funder_display_name":"Air Force Research Laboratory"},{"id":"https://openalex.org/G3736233273","display_name":null,"funder_award_id":"DE-EE0009152","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"},{"id":"https://openalex.org/F4320338294","display_name":"Air Force Research Laboratory","ror":"https://ror.org/02e2egq70"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1836465849","https://openalex.org/W1945616565","https://openalex.org/W2257979135","https://openalex.org/W2269778407","https://openalex.org/W2620038827","https://openalex.org/W2721006554","https://openalex.org/W2766447205","https://openalex.org/W2772709170","https://openalex.org/W2773525213","https://openalex.org/W2963744840","https://openalex.org/W2989847975","https://openalex.org/W3090386093","https://openalex.org/W3118210634","https://openalex.org/W4293846201","https://openalex.org/W4294349862","https://openalex.org/W4298857966","https://openalex.org/W4300568640","https://openalex.org/W4301187031","https://openalex.org/W6637967152","https://openalex.org/W6638667902","https://openalex.org/W6640425456","https://openalex.org/W6711870810","https://openalex.org/W6739868092","https://openalex.org/W6740406300","https://openalex.org/W6746177919","https://openalex.org/W6747027214","https://openalex.org/W6751834733","https://openalex.org/W6783838878"],"related_works":["https://openalex.org/W2768698792","https://openalex.org/W2951976120","https://openalex.org/W2983685817","https://openalex.org/W3136325136","https://openalex.org/W4285278887","https://openalex.org/W2995925505","https://openalex.org/W134783735","https://openalex.org/W2365237642","https://openalex.org/W3116484972","https://openalex.org/W4232431455"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"methods":[4],"are":[5],"vulnerable":[6],"to":[7,19,28,53,61,114,132],"adversarially":[8],"perturbed":[9],"states.":[10,122],"Though":[11],"the":[12,72,82,116,125],"nature":[13],"of":[14,50,75,84],"such":[15,30,145],"states":[16,32,56,109],"is":[17,39,96],"difficult":[18],"characterize,":[20],"popular":[21],"actor-critic":[22,51],"architectures":[23,52],"provide":[24],"a":[25,63,86,90,100],"natural":[26],"way":[27],"detect":[29],"pathological":[31,121],"by":[33,128],"determining":[34],"whether":[35],"their":[36],"value":[37],"head":[38],"below":[40],"some":[41],"value.":[42],"In":[43],"this":[44,48],"paper,":[45],"we":[46,70],"leverage":[47],"capacity":[49],"generate":[54],"low-value":[55,87],"that":[57,94],"can":[58,110],"be":[59,112],"used":[60,113],"define":[62],"training":[64,117],"regimen":[65],"for":[66,77,89],"RL":[67,78],"algorithms,":[68],"which":[69],"call":[71],"Pathological":[73],"Ensemble":[74],"Actions":[76],"(PEARL).":[79],"We":[80,123],"pose":[81],"problem":[83],"synthesizing":[85],"state":[88],"given":[91],"architecture,":[92],"prove":[93],"it":[95],"NP-hard,":[97],"and":[98,137,148],"present":[99],"solution":[101],"based":[102],"on":[103,142],"integer":[104],"linear":[105],"programming.":[106],"The":[107],"generated":[108],"then":[111],"augment":[115],"data":[118],"with":[119],"these":[120],"demonstrate":[124],"gains":[126],"obtained":[127],"PEARL":[129],"when":[130],"compared":[131],"standard":[133],"Proximal":[134],"Policy":[135],"Optimization":[136],"Monte-Carlo":[138],"Tree":[139],"Search":[140],"baselines":[141],"board":[143],"games":[144],"as":[146],"Go":[147],"Checkers.":[149]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
