{"id":"https://openalex.org/W3145417110","doi":"https://doi.org/10.1109/cdc45484.2021.9683261","title":"Reinforcement Learning Beyond Expectation","display_name":"Reinforcement Learning Beyond Expectation","publication_year":2021,"publication_date":"2021-12-14","ids":{"openalex":"https://openalex.org/W3145417110","doi":"https://doi.org/10.1109/cdc45484.2021.9683261","mag":"3145417110"},"language":"en","primary_location":{"id":"doi:10.1109/cdc45484.2021.9683261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc45484.2021.9683261","pdf_url":null,"source":{"id":"https://openalex.org/S4363607724","display_name":"2021 60th IEEE Conference on Decision and Control (CDC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 60th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.00540","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052064870","display_name":"Bhaskar Ramasubramanian","orcid":"https://orcid.org/0000-0002-2166-7838"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bhaskar Ramasubramanian","raw_affiliation_strings":["University of Washington,Network Security Lab,Department of Electrical and Computer Engineering,Seattle,WA,USA,98195"],"affiliations":[{"raw_affiliation_string":"University of Washington,Network Security Lab,Department of Electrical and Computer Engineering,Seattle,WA,USA,98195","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018806127","display_name":"Luyao Niu","orcid":"https://orcid.org/0000-0001-8591-5522"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luyao Niu","raw_affiliation_strings":["Worcester Polytechnic Institute,Department of Electrical and Computer Engineering,Worcester,MA,USA,01609"],"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute,Department of Electrical and Computer Engineering,Worcester,MA,USA,01609","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004774385","display_name":"Andrew Clark","orcid":"https://orcid.org/0000-0002-5868-6186"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew Clark","raw_affiliation_strings":["Worcester Polytechnic Institute,Department of Electrical and Computer Engineering,Worcester,MA,USA,01609"],"affiliations":[{"raw_affiliation_string":"Worcester Polytechnic Institute,Department of Electrical and Computer Engineering,Worcester,MA,USA,01609","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079723268","display_name":"Radha Poovendran","orcid":"https://orcid.org/0000-0003-0269-8097"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Radha Poovendran","raw_affiliation_strings":["University of Washington,Network Security Lab,Department of Electrical and Computer Engineering,Seattle,WA,USA,98195"],"affiliations":[{"raw_affiliation_string":"University of Washington,Network Security Lab,Department of Electrical and Computer Engineering,Seattle,WA,USA,98195","institution_ids":["https://openalex.org/I201448701"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5052064870"],"corresponding_institution_ids":["https://openalex.org/I201448701"],"apc_list":null,"apc_paid":null,"fwci":0.127,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.30882217,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1528","last_page":"1535"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9746999740600586,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9574000239372253,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8579413890838623},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7380458116531372},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6008168458938599},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5309302806854248},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5162941813468933},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4797976315021515},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.46295151114463806},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4468223452568054},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4149933159351349},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3996725082397461},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35583916306495667},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.08243191242218018}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8579413890838623},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7380458116531372},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6008168458938599},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5309302806854248},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5162941813468933},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4797976315021515},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.46295151114463806},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4468223452568054},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4149933159351349},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3996725082397461},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35583916306495667},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.08243191242218018},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/cdc45484.2021.9683261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc45484.2021.9683261","pdf_url":null,"source":{"id":"https://openalex.org/S4363607724","display_name":"2021 60th IEEE Conference on Decision and Control (CDC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 60th IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.00540","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.00540","pdf_url":"https://arxiv.org/pdf/2104.00540","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3145417110","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2104.00540","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2104.00540","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.00540","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.00540","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.00540","pdf_url":"https://arxiv.org/pdf/2104.00540","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3145417110.pdf","grobid_xml":"https://content.openalex.org/works/W3145417110.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W617973407","https://openalex.org/W1589395967","https://openalex.org/W1825869920","https://openalex.org/W2038398071","https://openalex.org/W2041946752","https://openalex.org/W2056158656","https://openalex.org/W2071983464","https://openalex.org/W2088413745","https://openalex.org/W2099161251","https://openalex.org/W2100752967","https://openalex.org/W2107431923","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2133469585","https://openalex.org/W2139914196","https://openalex.org/W2145339207","https://openalex.org/W2162849300","https://openalex.org/W2165131254","https://openalex.org/W2169206416","https://openalex.org/W2221524898","https://openalex.org/W2257979135","https://openalex.org/W2411577903","https://openalex.org/W2796289712","https://openalex.org/W2887536917","https://openalex.org/W2896408693","https://openalex.org/W2901621510","https://openalex.org/W2909906617","https://openalex.org/W2962766509","https://openalex.org/W2962883549","https://openalex.org/W2962951833","https://openalex.org/W2964108826","https://openalex.org/W3011865677","https://openalex.org/W3021487874","https://openalex.org/W3122620151","https://openalex.org/W3124407081","https://openalex.org/W3125893104","https://openalex.org/W3148350802","https://openalex.org/W3157075202","https://openalex.org/W3157409643","https://openalex.org/W6664367681","https://openalex.org/W6752725515","https://openalex.org/W6755819691","https://openalex.org/W6765780914","https://openalex.org/W6807091881"],"related_works":["https://openalex.org/W2322168405","https://openalex.org/W2765222421","https://openalex.org/W3201003870","https://openalex.org/W2292969843","https://openalex.org/W2969966185","https://openalex.org/W2788212683","https://openalex.org/W2556285270","https://openalex.org/W2979363950","https://openalex.org/W1625755538","https://openalex.org/W2825725460","https://openalex.org/W2940957092","https://openalex.org/W2617918403","https://openalex.org/W2261683202","https://openalex.org/W1562694074","https://openalex.org/W2533925791","https://openalex.org/W2973186106","https://openalex.org/W3198127929","https://openalex.org/W3147894289","https://openalex.org/W1496262375","https://openalex.org/W3184424952"],"abstract_inverted_index":{"The":[0],"inputs":[1],"and":[2,65,142,170,215],"preferences":[3,37],"of":[4,32,38,60,120,132,135,146,191,203],"human":[5,42,121,205],"users":[6,14],"are":[7,93,198],"important":[8],"considerations":[9],"in":[10,29,83,139,174,211],"situations":[11],"where":[12,75,176],"these":[13,23,91,172,196],"interact":[15],"with":[16,35,98,112,201],"autonomous":[17,77],"cyber":[18],"or":[19,40],"cyber-physical":[20],"systems.":[21],"In":[22,68,87,106],"scenarios,":[24],"one":[25,39],"is":[26,48,216],"often":[27],"interested":[28],"aligning":[30],"behaviors":[31,82,92,190],"the":[33,36,99,110,113,118,130,133,144,168,192,212],"system":[34],"more":[41],"users.":[43],"Cumulative":[44],"prospect":[45],"theory":[46],"(CPT)":[47],"a":[49,58,73,125,140,177,204,220],"paradigm":[50],"that":[51,189,202,222],"has":[52,79,180],"been":[53],"empirically":[54],"shown":[55],"to":[56,62,80,108,115,152,160,163,166,181],"model":[57],"tendency":[59],"humans":[61],"view":[63],"gains":[64],"losses":[66],"differently.":[67],"this":[69,154],"paper,":[70],"we":[71,123],"consider":[72],"setting":[74],"an":[76,84,103,136,147,224],"agent":[78,111,193],"learn":[81,164],"unknown":[85],"environment.":[86],"traditional":[88],"reinforcement":[89],"learning,":[90],"learned":[94,194],"through":[95],"repeated":[96],"interactions":[97],"environment":[100],"by":[101],"optimizing":[102],"expected":[104,225],"utility.":[105,226],"order":[107],"endow":[109],"ability":[114],"closely":[116],"mimic":[117],"behavior":[119],"users,":[122],"optimize":[124,167],"CPT-based":[126],"cost.":[127],"We":[128,156,187],"introduce":[129],"notion":[131],"CPT-value":[134],"action":[137],"taken":[138],"state,":[141],"establish":[143],"convergence":[145],"iterative":[148],"dynamic":[149],"programming-based":[150],"approach":[151],"estimate":[153],"quantity.":[155],"develop":[157],"two":[158],"algorithms":[159,173,197],"enable":[161],"agents":[162],"policies":[165],"CPT-value,":[169],"evaluate":[171],"environments":[175],"target":[178],"state":[179],"be":[182,209],"reached":[183],"while":[184],"avoiding":[185],"obstacles.":[186],"demonstrate":[188],"using":[195],"better":[199],"aligned":[200],"user":[206],"who":[207],"might":[208],"placed":[210],"same":[213],"environment,":[214],"significantly":[217],"improved":[218],"over":[219],"baseline":[221],"optimizes":[223]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
