{"id":"https://openalex.org/W3162164975","doi":"https://doi.org/10.1109/tac.2023.3240925","title":"Learning to Act Safely With Limited Exposure and Almost Sure Certainty","display_name":"Learning to Act Safely With Limited Exposure and Almost Sure Certainty","publication_year":2023,"publication_date":"2023-01-31","ids":{"openalex":"https://openalex.org/W3162164975","doi":"https://doi.org/10.1109/tac.2023.3240925","mag":"3162164975"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2023.3240925","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2023.3240925","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018563588","display_name":"Agustin Castellano","orcid":"https://orcid.org/0000-0003-3491-5746"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Agustin Castellano","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Johns Hopkins University, Baltimore, MD, USA"],"raw_orcid":"https://orcid.org/0000-0003-3491-5746","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Johns Hopkins University, Baltimore, MD, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038819585","display_name":"Hancheng Min","orcid":"https://orcid.org/0000-0003-0600-3854"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hancheng Min","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Johns Hopkins University, Baltimore, MD, USA"],"raw_orcid":"https://orcid.org/0000-0003-0600-3854","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Johns Hopkins University, Baltimore, MD, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029019906","display_name":"Juan Andr\u00e9s Bazerque","orcid":"https://orcid.org/0000-0001-9950-1208"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Juan Andres Bazerque","raw_affiliation_strings":["Department of Electrical Engineering, University of Pittsburgh, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0001-9950-1208","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, University of Pittsburgh, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032910355","display_name":"Enrique Mallada","orcid":"https://orcid.org/0000-0003-1568-1833"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Enrique Mallada","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Johns Hopkins University, Baltimore, MD, USA"],"raw_orcid":"https://orcid.org/0000-0003-1568-1833","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Johns Hopkins University, Baltimore, MD, USA","institution_ids":["https://openalex.org/I145311948"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4161,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.81801517,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"68","issue":"5","first_page":"2979","last_page":"2994"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7466861009597778},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6231963634490967},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5718147158622742},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.5369678735733032},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.503229558467865},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.49400192499160767},{"id":"https://openalex.org/keywords/finite-set","display_name":"Finite set","score":0.49075445532798767},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4430108666419983},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.41030603647232056},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2754971385002136},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21159988641738892}],"concepts":[{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7466861009597778},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6231963634490967},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5718147158622742},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.5369678735733032},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.503229558467865},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.49400192499160767},{"id":"https://openalex.org/C162392398","wikidata":"https://www.wikidata.org/wiki/Q272404","display_name":"Finite set","level":2,"score":0.49075445532798767},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4430108666419983},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.41030603647232056},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2754971385002136},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21159988641738892},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2023.3240925","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2023.3240925","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7200000286102295,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":89,"referenced_works":["https://openalex.org/W51049863","https://openalex.org/W645921674","https://openalex.org/W1506085041","https://openalex.org/W1542232783","https://openalex.org/W1582706191","https://openalex.org/W1588998206","https://openalex.org/W1845972764","https://openalex.org/W1994616650","https://openalex.org/W2009551863","https://openalex.org/W2013614847","https://openalex.org/W2027579135","https://openalex.org/W2035490465","https://openalex.org/W2098432798","https://openalex.org/W2105078254","https://openalex.org/W2108114251","https://openalex.org/W2121863487","https://openalex.org/W2143346970","https://openalex.org/W2147750403","https://openalex.org/W2149166950","https://openalex.org/W2160815625","https://openalex.org/W2164479831","https://openalex.org/W2167770337","https://openalex.org/W2169106666","https://openalex.org/W2222789563","https://openalex.org/W2257979135","https://openalex.org/W2562316362","https://openalex.org/W2618318883","https://openalex.org/W2622826443","https://openalex.org/W2737906791","https://openalex.org/W2749807327","https://openalex.org/W2751801093","https://openalex.org/W2784465508","https://openalex.org/W2788084076","https://openalex.org/W2798588334","https://openalex.org/W2897661175","https://openalex.org/W2898291697","https://openalex.org/W2914316834","https://openalex.org/W2914506601","https://openalex.org/W2914702425","https://openalex.org/W2963958573","https://openalex.org/W2964080237","https://openalex.org/W2964340170","https://openalex.org/W2966956172","https://openalex.org/W2968945909","https://openalex.org/W2985692297","https://openalex.org/W2989800586","https://openalex.org/W2991391803","https://openalex.org/W3011120880","https://openalex.org/W3011250830","https://openalex.org/W3026873144","https://openalex.org/W3034840734","https://openalex.org/W3046384803","https://openalex.org/W3048735518","https://openalex.org/W3082066778","https://openalex.org/W3090050536","https://openalex.org/W3098983831","https://openalex.org/W3101517963","https://openalex.org/W3102674085","https://openalex.org/W3110408272","https://openalex.org/W3126521706","https://openalex.org/W3128962298","https://openalex.org/W3159659315","https://openalex.org/W3162164975","https://openalex.org/W3176971532","https://openalex.org/W3183819859","https://openalex.org/W4206530644","https://openalex.org/W4214717370","https://openalex.org/W4230704549","https://openalex.org/W4233696721","https://openalex.org/W4294555834","https://openalex.org/W4300723704","https://openalex.org/W4301018745","https://openalex.org/W6639175102","https://openalex.org/W6676576766","https://openalex.org/W6684037837","https://openalex.org/W6731039252","https://openalex.org/W6738483526","https://openalex.org/W6747790125","https://openalex.org/W6751051826","https://openalex.org/W6766502378","https://openalex.org/W6772100842","https://openalex.org/W6774406872","https://openalex.org/W6779771351","https://openalex.org/W6779812412","https://openalex.org/W6784033082","https://openalex.org/W6785471904","https://openalex.org/W6785819423","https://openalex.org/W6811206064","https://openalex.org/W6922480057"],"related_works":["https://openalex.org/W1985560493","https://openalex.org/W2386410636","https://openalex.org/W1626977535","https://openalex.org/W2341346307","https://openalex.org/W2808418668","https://openalex.org/W3096874164","https://openalex.org/W2357975469","https://openalex.org/W2937181779","https://openalex.org/W2145363145","https://openalex.org/W3168977894"],"abstract_inverted_index":{"This":[0,33],"article":[1],"puts":[2],"forward":[3],"the":[4,24,54,73,80,87,121,128,131,140,159,169,176,207,211,220,234,245],"concept":[5,64],"that":[6,38,101,158,166,187,239],"learning":[7,84,246],"to":[8,42,50,126,215,224],"take":[9],"safe":[10,135],"actions":[11],"in":[12,65,86,107,193],"unknown":[13],"environments,":[14],"even":[15],"with":[16,152],"probability":[17,132],"one":[18,39],"guarantees,":[19],"can":[20,242],"be":[21],"achieved":[22],"without":[23],"need":[25],"for":[26,146,168,210],"an":[27,99,108],"unbounded":[28],"number":[29,111,122,197],"of":[30,48,58,83,89,112,123,133,142,171,175,198,222],"exploratory":[31],"trials.":[32],"is":[34,40],"indeed":[35],"possible,":[36],"provided":[37],"willing":[41],"navigate":[43],"tradeoffs":[44,82],"between":[45,120,206],"optimality,":[46],"level":[47,221],"exposure":[49,223],"unsafe":[51,59,105,190,217,225],"events,":[52],"and":[53,78,130,219,237],"maximum":[55],"detection":[56],"time":[57,208],"actions.":[60],"We":[61,69,137,156],"illustrate":[62],"this":[63,180],"two":[66],"complementary":[67],"settings.":[68],"first":[70],"focus":[71],"on":[72,94],"canonical":[74],"multiarmed":[75],"bandit":[76],"problem":[77,141],"study":[79],"intrinsic":[81],"safety":[85,240],"presence":[88],"uncertainty.":[90],"Under":[91],"mild":[92],"assumptions":[93],"sufficient":[95],"exploration,":[96],"we":[97,182],"provide":[98],"algorithm":[100],"provably":[102],"detects":[103],"all":[104],"machines":[106],"(expected)":[109],"finite":[110,195],"rounds.":[113],"The":[114],"analysis":[115,201],"also":[116],"unveils":[117],"a":[118,147,163,184,194,204],"tradeoff":[119,205],"rounds":[124],"needed":[125],"secure":[127],"environment":[129],"discarding":[134],"machines.":[136],"then":[138],"consider":[139],"finding":[143],"optimal":[144],"policies":[145,173],"Markov":[148],"decision":[149],"process":[150],"(MDP)":[151],"almost":[153],"sure":[154],"constraints.":[155],"show":[157],"action-value":[160],"function":[161],"satisfies":[162],"barrier-based":[164],"decomposition":[165],"allows":[167],"identification":[170],"feasible":[172],"independently":[174],"reward":[177],"process.":[178,247],"Using":[179],"decomposition,":[181],"develop":[183],"barrier-learning":[185],"algorithm,":[186],"identifies":[188],"such":[189],"state\u2013action":[191],"pairs":[192],"expected":[196],"steps.":[199],"Our":[200],"further":[202,232],"highlights":[203],"lag":[209],"underlying":[212],"MDP":[213],"necessary":[214],"detect":[216],"actions,":[218],"events.":[226],"Simulations":[227],"corroborate":[228],"our":[229],"theoretical":[230],"findings,":[231],"illustrating":[233],"aforementioned":[235],"tradeoffs,":[236],"suggesting":[238],"constraints":[241],"speed":[243],"up":[244]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
