{"id":"https://openalex.org/W4392341858","doi":"https://doi.org/10.1145/3649885","title":"Batch Active Learning of Reward Functions from Human Preferences","display_name":"Batch Active Learning of Reward Functions from Human Preferences","publication_year":2024,"publication_date":"2024-02-29","ids":{"openalex":"https://openalex.org/W4392341858","doi":"https://doi.org/10.1145/3649885"},"language":"en","primary_location":{"id":"doi:10.1145/3649885","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3649885","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3649885","source":{"id":"https://openalex.org/S4210193251","display_name":"ACM Transactions on Human-Robot Interaction","issn_l":"2573-9522","issn":["2573-9522"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Human-Robot Interaction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3649885","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031426401","display_name":"Erdem B\u0131y\u0131k","orcid":"https://orcid.org/0000-0002-9516-3130"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Erdem Biyik","raw_affiliation_strings":["Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, USA"],"raw_orcid":"https://orcid.org/0000-0002-9516-3130","affiliations":[{"raw_affiliation_string":"Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040375566","display_name":"Nima Anari","orcid":"https://orcid.org/0000-0002-4394-3530"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nima Anari","raw_affiliation_strings":["Department of Computer Science, Stanford University, Stanford, USA"],"raw_orcid":"https://orcid.org/0000-0002-4394-3530","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080725225","display_name":"Dorsa Sadigh","orcid":"https://orcid.org/0000-0002-7802-9183"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dorsa Sadigh","raw_affiliation_strings":["Department of Computer Science, Stanford University, Stanford, USA"],"raw_orcid":"https://orcid.org/0000-0002-7802-9183","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5031426401"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":2.3179,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.89225228,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"13","issue":"2","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9761000275611877,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.46259966492652893},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.418569952249527},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.4099085032939911},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.39726778864860535},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3386988639831543}],"concepts":[{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.46259966492652893},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.418569952249527},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.4099085032939911},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.39726778864860535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3386988639831543}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3649885","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3649885","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3649885","source":{"id":"https://openalex.org/S4210193251","display_name":"ACM Transactions on Human-Robot Interaction","issn_l":"2573-9522","issn":["2573-9522"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Human-Robot Interaction","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3649885","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3649885","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3649885","source":{"id":"https://openalex.org/S4210193251","display_name":"ACM Transactions on Human-Robot Interaction","issn_l":"2573-9522","issn":["2573-9522"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Human-Robot Interaction","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8977957641","display_name":"Collaborative Research: CPS: Small: Risk-Aware Planning and Control for Safety-Critical Human-CPS","funder_award_id":"2218760","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392341858.pdf","grobid_xml":"https://content.openalex.org/works/W4392341858.grobid-xml"},"referenced_works_count":60,"referenced_works":["https://openalex.org/W87092222","https://openalex.org/W122021961","https://openalex.org/W187005120","https://openalex.org/W1583953806","https://openalex.org/W1826690079","https://openalex.org/W1837766154","https://openalex.org/W1983599491","https://openalex.org/W1991814714","https://openalex.org/W1995780830","https://openalex.org/W2008989859","https://openalex.org/W2044442377","https://openalex.org/W2076410399","https://openalex.org/W2098642746","https://openalex.org/W2108687396","https://openalex.org/W2111336742","https://openalex.org/W2122350398","https://openalex.org/W2128483847","https://openalex.org/W2150593711","https://openalex.org/W2152228468","https://openalex.org/W2154023516","https://openalex.org/W2158782408","https://openalex.org/W2187580259","https://openalex.org/W2395738761","https://openalex.org/W2411577903","https://openalex.org/W2508337090","https://openalex.org/W2541743403","https://openalex.org/W2559655401","https://openalex.org/W2559997609","https://openalex.org/W2604272474","https://openalex.org/W2612690371","https://openalex.org/W2735318784","https://openalex.org/W2766802377","https://openalex.org/W2791379569","https://openalex.org/W2793405567","https://openalex.org/W2913210897","https://openalex.org/W2963491601","https://openalex.org/W2963722886","https://openalex.org/W2963877232","https://openalex.org/W2964177756","https://openalex.org/W2964220187","https://openalex.org/W2964269311","https://openalex.org/W2990138404","https://openalex.org/W3004361447","https://openalex.org/W3015044328","https://openalex.org/W3039563104","https://openalex.org/W3090359064","https://openalex.org/W3091627088","https://openalex.org/W3104324249","https://openalex.org/W3129827477","https://openalex.org/W3197594072","https://openalex.org/W4205632023","https://openalex.org/W4212774754","https://openalex.org/W4230167402","https://openalex.org/W4252684946","https://openalex.org/W4288083537","https://openalex.org/W4299828299","https://openalex.org/W4383066631","https://openalex.org/W4383108621","https://openalex.org/W4388458590","https://openalex.org/W6922408002"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Data":[0],"generation":[1,81,99],"and":[2,44,83,100],"labeling":[3,18],"are":[4,28,133],"often":[5],"expensive":[6],"in":[7,31,116,135,147],"robot":[8],"learning.":[9],"Preference-based":[10],"learning":[11,33,60,65,125],"is":[12],"a":[13,52,89,111,129,136,148],"concept":[14],"that":[15,62,121,132],"enables":[16],"reliable":[17],"by":[19],"querying":[20,26],"users":[21],"with":[22],"preference":[23],"questions.":[24],"Active":[25],"methods":[27],"commonly":[29],"employed":[30],"preference-based":[32,59],"to":[34,150],"generate":[35],"more":[36],"informative":[37],"data":[38,72],"at":[39],"the":[40],"expense":[41],"of":[42,54,66,113,139,144],"parallelization":[43],"computation":[45],"time.":[46,140],"In":[47],"this":[48],"article,":[49],"we":[50,105],"develop":[51],"set":[53],"novel":[55],"algorithms,":[56],"batch":[57,98,123],"active":[58,97,124],"methods,":[61],"enable":[63],"efficient":[64],"reward":[67],"functions":[68],"using":[69],"as":[70,74],"few":[71,130],"samples":[73],"possible":[75],"while":[76],"still":[77],"having":[78],"short":[79,137],"query":[80],"times":[82],"also":[84],"retaining":[85],"parallelizability.":[86],"We":[87,141],"introduce":[88],"method":[90],"based":[91],"on":[92],"determinantal":[93],"point":[94],"processes":[95],"for":[96,110],"several":[101],"heuristic-based":[102],"alternatives.":[103],"Finally,":[104],"present":[106],"our":[107,122,145],"experimental":[108],"results":[109,119],"variety":[112],"robotics":[114],"tasks":[115],"simulation.":[117],"Our":[118],"suggest":[120],"algorithm":[126],"requires":[127],"only":[128],"queries":[131],"computed":[134],"amount":[138],"showcase":[142],"one":[143],"algorithms":[146],"study":[149],"learn":[151],"human":[152],"users\u2019":[153],"preferences.":[154]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
