{"id":"https://openalex.org/W4382239386","doi":"https://doi.org/10.1609/aaai.v37i5.25733","title":"The Perils of Trial-and-Error Reward Design: Misdesign through Overfitting and Invalid Task Specifications","display_name":"The Perils of Trial-and-Error Reward Design: Misdesign through Overfitting and Invalid Task Specifications","publication_year":2023,"publication_date":"2023-06-26","ids":{"openalex":"https://openalex.org/W4382239386","doi":"https://doi.org/10.1609/aaai.v37i5.25733"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v37i5.25733","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v37i5.25733","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/25733/25505","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/25733/25505","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084731369","display_name":"Serena Booth","orcid":"https://orcid.org/0000-0001-7738-4418"},"institutions":[{"id":"https://openalex.org/I4210120115","display_name":"Robert Bosch (United States)","ror":"https://ror.org/02venad53","country_code":"US","type":"company","lineage":["https://openalex.org/I4210120115","https://openalex.org/I889804353"]},{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Serena Booth","raw_affiliation_strings":["Bosch\nThe University of Texas at Austin\nMIT CSAIL"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bosch\nThe University of Texas at Austin\nMIT CSAIL","institution_ids":["https://openalex.org/I86519309","https://openalex.org/I4210120115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056649746","display_name":"W. Bradley Knox","orcid":"https://orcid.org/0000-0002-6006-9523"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210120115","display_name":"Robert Bosch (United States)","ror":"https://ror.org/02venad53","country_code":"US","type":"company","lineage":["https://openalex.org/I4210120115","https://openalex.org/I889804353"]},{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"W. Bradley Knox","raw_affiliation_strings":["Bosch\nThe University of Texas at Austin\nGoogle Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bosch\nThe University of Texas at Austin\nGoogle Research","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I86519309","https://openalex.org/I4210120115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044369720","display_name":"Julie Shah","orcid":"https://orcid.org/0000-0003-1338-8107"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Julie Shah","raw_affiliation_strings":["MIT CSAIL"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MIT CSAIL","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043572737","display_name":"Scott Niekum","orcid":null},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]},{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott Niekum","raw_affiliation_strings":["The University of Texas at Austin\nThe University of Massachusetts at Amherst"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin\nThe University of Massachusetts at Amherst","institution_ids":["https://openalex.org/I24603500","https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001594330","display_name":"Peter Stone","orcid":"https://orcid.org/0000-0002-6795-420X"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peter Stone","raw_affiliation_strings":["The University of Texas at Austin\nSony AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin\nSony AI","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059194708","display_name":"Alessandro Allievi","orcid":"https://orcid.org/0000-0001-5793-7679"},"institutions":[{"id":"https://openalex.org/I4210120115","display_name":"Robert Bosch (United States)","ror":"https://ror.org/02venad53","country_code":"US","type":"company","lineage":["https://openalex.org/I4210120115","https://openalex.org/I889804353"]},{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alessandro Allievi","raw_affiliation_strings":["Bosch\nThe University of Texas at Austin"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bosch\nThe University of Texas at Austin","institution_ids":["https://openalex.org/I86519309","https://openalex.org/I4210120115"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5084731369"],"corresponding_institution_ids":["https://openalex.org/I4210120115","https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":24.7531,"has_fulltext":true,"cited_by_count":43,"citation_normalized_percentile":{"value":0.99641148,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"37","issue":"5","first_page":"5920","last_page":"5929"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9379000067710876,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.9403963685035706},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7620341181755066},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6956759095191956},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6698585152626038},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6243281960487366},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6235438585281372},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5911116003990173},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5575098395347595},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.46228861808776855},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.45690858364105225},{"id":"https://openalex.org/keywords/performance-metric","display_name":"Performance metric","score":0.4112311601638794},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1612103283405304}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.9403963685035706},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7620341181755066},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6956759095191956},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6698585152626038},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6243281960487366},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6235438585281372},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5911116003990173},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5575098395347595},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.46228861808776855},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.45690858364105225},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.4112311601638794},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1612103283405304},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v37i5.25733","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v37i5.25733","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/25733/25505","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v37i5.25733","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v37i5.25733","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/25733/25505","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4382239386.pdf"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1453801241","https://openalex.org/W1777239053","https://openalex.org/W1979290264","https://openalex.org/W2098774185","https://openalex.org/W2103012681","https://openalex.org/W2115008305","https://openalex.org/W2123408238","https://openalex.org/W2145339207","https://openalex.org/W2260756217","https://openalex.org/W2386192529","https://openalex.org/W2462906003","https://openalex.org/W2580300496","https://openalex.org/W2624731731","https://openalex.org/W2626804490","https://openalex.org/W2736601468","https://openalex.org/W2754517384","https://openalex.org/W2804948070","https://openalex.org/W2912063360","https://openalex.org/W2945796529","https://openalex.org/W2950892788","https://openalex.org/W2962889474","https://openalex.org/W2964043796","https://openalex.org/W2981344907","https://openalex.org/W2990747716","https://openalex.org/W2995894173","https://openalex.org/W3034769194","https://openalex.org/W3102583453","https://openalex.org/W3126321819","https://openalex.org/W3157893055","https://openalex.org/W3203757507","https://openalex.org/W3207649131","https://openalex.org/W4221141793","https://openalex.org/W4235688125","https://openalex.org/W4281963856","https://openalex.org/W4300110528","https://openalex.org/W4307106652","https://openalex.org/W4320449037","https://openalex.org/W4321392130","https://openalex.org/W6601295022","https://openalex.org/W6645210854","https://openalex.org/W6677916085","https://openalex.org/W6678307873","https://openalex.org/W6743368274","https://openalex.org/W6744123322","https://openalex.org/W6758326461","https://openalex.org/W6794667414","https://openalex.org/W6796301005"],"related_works":["https://openalex.org/W4298369531","https://openalex.org/W3155135229","https://openalex.org/W4281847915","https://openalex.org/W4361804730","https://openalex.org/W2142113611","https://openalex.org/W2334467465","https://openalex.org/W2018387840","https://openalex.org/W2087870008","https://openalex.org/W3172351367","https://openalex.org/W2162534555"],"abstract_inverted_index":{"In":[0,81,137],"reinforcement":[1],"learning":[2,168],"(RL),":[3],"a":[4,11,22,28,88,134,176,210],"reward":[5,29,61,65,89,118,128,152,162,187,196,207,233],"function":[6,90,119,129,197],"that":[7,91,159,161,202],"aligns":[8],"exactly":[9],"with":[10,58,94],"task's":[12],"true":[13,23,41],"performance":[14,93],"metric":[15,25,99],"is":[16,120],"often":[17,56],"necessarily":[18],"sparse.":[19],"For":[20],"example,":[21],"task":[24,42,98,226],"might":[26],"encode":[27],"of":[30,39,77,114,144,150,186,195,218],"1":[31],"upon":[32],"success":[33],"and":[34,79,170,213],"0":[35],"otherwise.":[36],"The":[37],"sparsity":[38],"these":[40],"metrics":[43],"can":[44,130,164],"make":[45],"them":[46],"hard":[47],"to":[48,96,106,133,167,206,222],"learn":[49,107],"from,":[50],"so":[51],"in":[52,189],"practice":[53,149],"they":[54],"are":[55,67],"replaced":[57],"alternative":[59],"dense":[60,64],"functions.":[62],"These":[63],"functions":[66,163],"typically":[68],"designed":[69],"by":[70],"experts":[71,84],"through":[72,224],"an":[73,103,242],"ad":[74],"hoc":[75],"process":[76,110],"trial":[78],"error.":[80],"this":[82,138,145],"process,":[83],"manually":[85],"search":[86],"for":[87,122,237],"improves":[92],"respect":[95],"the":[97,112,116,127,142,215],"while":[100],"also":[101,200],"enabling":[102],"RL":[104,229],"algorithm":[105],"faster.":[108],"This":[109],"raises":[111],"question":[113],"whether":[115,126],"same":[117],"optimal":[121],"all":[123],"algorithms,":[124],"i.e.,":[125],"be":[131,165],"overfit":[132,166],"particular":[135],"algorithm.":[136],"paper,":[139],"we":[140,191],"study":[141,179],"consequences":[143],"wide":[146],"yet":[147],"unexamined":[148],"trial-and-error":[151],"design.":[153],"We":[154,173,199],"first":[155],"conduct":[156,175],"computational":[157],"experiments":[158],"confirm":[160],"algorithms":[169,230],"their":[171],"hyperparameters.":[172],"then":[174],"controlled":[177],"observation":[178],"which":[180,190],"emulates":[181],"expert":[182],"practitioners'":[183],"typical":[184,204],"experiences":[185],"design,":[188],"similarly":[192],"find":[193,201],"evidence":[194],"overfitting.":[198],"experts'":[203],"approach":[205],"design---of":[208],"adopting":[209],"myopic":[211],"strategy":[212],"weighing":[214],"relative":[216],"goodness":[217],"each":[219],"state-action":[220,239],"pair---leads":[221],"misdesign":[223],"invalid":[225],"specifications,":[227],"since":[228],"use":[231],"cumulative":[232],"rather":[234],"than":[235],"rewards":[236],"individual":[238],"pairs":[240],"as":[241],"optimization":[243],"target.":[244],"Code,":[245],"data:":[246],"github.com/serenabooth/reward-design-perils":[247]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":3}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
