{"id":"https://openalex.org/W4410552480","doi":"https://doi.org/10.1145/3736424","title":"Can I Trust You?\u2014Handling Unreliable Human Action Advice in Interactive Reinforcement Learning","display_name":"Can I Trust You?\u2014Handling Unreliable Human Action Advice in Interactive Reinforcement Learning","publication_year":2025,"publication_date":"2025-05-21","ids":{"openalex":"https://openalex.org/W4410552480","doi":"https://doi.org/10.1145/3736424"},"language":"en","primary_location":{"id":"doi:10.1145/3736424","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3736424","pdf_url":null,"source":{"id":"https://openalex.org/S4210193251","display_name":"ACM Transactions on Human-Robot Interaction","issn_l":"2573-9522","issn":["2573-9522"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Human-Robot Interaction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082442455","display_name":"Lisa Scherf","orcid":"https://orcid.org/0009-0004-0950-5184"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Lisa Kempf","raw_affiliation_strings":["Technische Universit\u00e4t Darmstadt, Darmstadt, Germany","Technische Universit\u00e4t Darmstadt, Germany"],"raw_orcid":"https://orcid.org/0009-0004-0950-5184","affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]},{"raw_affiliation_string":"Technische Universit\u00e4t Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117618709","display_name":"Christian Maurer","orcid":null},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christian Maurer","raw_affiliation_strings":["Technische Universit\u00e4t Darmstadt, Darmstadt, Germany","Technische Universit\u00e4t Darmstadt, Germany"],"raw_orcid":"https://orcid.org/0009-0000-8104-9049","affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]},{"raw_affiliation_string":"Technische Universit\u00e4t Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014107463","display_name":"Cigdem Turan","orcid":"https://orcid.org/0000-0002-4836-6023"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Cigdem Turan-Schwiewager","raw_affiliation_strings":["Technische Universit\u00e4t Darmstadt, Darmstadt, Germany","Technische Universit\u00e4t Darmstadt, Germany"],"raw_orcid":"https://orcid.org/0000-0002-4836-6023","affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]},{"raw_affiliation_string":"Technische Universit\u00e4t Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063353075","display_name":"Dorothea Koert","orcid":"https://orcid.org/0000-0002-3571-6848"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dorothea Koert","raw_affiliation_strings":["Technische Universit\u00e4t Darmstadt, Darmstadt, Germany","Technische Universit\u00e4t Darmstadt, Germany"],"raw_orcid":"https://orcid.org/0000-0002-3571-6848","affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]},{"raw_affiliation_string":"Technische Universit\u00e4t Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5082442455"],"corresponding_institution_ids":["https://openalex.org/I31512782"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04890921,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":"4","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11883","display_name":"Embodied and Extended Cognition","score":0.9065999984741211,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/advice","display_name":"Advice (programming)","score":0.7436941862106323},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.683922290802002},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6752526164054871},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5652973651885986},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.39422762393951416},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.37710779905319214},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.26219120621681213},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21042808890342712}],"concepts":[{"id":"https://openalex.org/C2779955035","wikidata":"https://www.wikidata.org/wiki/Q4686785","display_name":"Advice (programming)","level":2,"score":0.7436941862106323},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.683922290802002},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6752526164054871},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5652973651885986},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.39422762393951416},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.37710779905319214},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.26219120621681213},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21042808890342712},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3736424","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3736424","pdf_url":null,"source":{"id":"https://openalex.org/S4210193251","display_name":"ACM Transactions on Human-Robot Interaction","issn_l":"2573-9522","issn":["2573-9522"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Human-Robot Interaction","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W121023703","https://openalex.org/W1589064538","https://openalex.org/W1655830068","https://openalex.org/W1996626416","https://openalex.org/W1997618609","https://openalex.org/W2032496699","https://openalex.org/W2044865707","https://openalex.org/W2074056782","https://openalex.org/W2098441518","https://openalex.org/W2107411295","https://openalex.org/W2116157560","https://openalex.org/W2129659607","https://openalex.org/W2275119517","https://openalex.org/W2294422333","https://openalex.org/W2345995343","https://openalex.org/W2477633158","https://openalex.org/W2539402368","https://openalex.org/W2594002025","https://openalex.org/W2775677125","https://openalex.org/W2783564726","https://openalex.org/W2789322388","https://openalex.org/W2837981523","https://openalex.org/W2944766483","https://openalex.org/W2957311632","https://openalex.org/W3039116038","https://openalex.org/W3090894178","https://openalex.org/W3135497281","https://openalex.org/W3171912333","https://openalex.org/W4214717370","https://openalex.org/W4233216703","https://openalex.org/W4285231577","https://openalex.org/W4288080223","https://openalex.org/W4313563086","https://openalex.org/W6635327058"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W4393601209","https://openalex.org/W4404663548","https://openalex.org/W3090906284","https://openalex.org/W253876680","https://openalex.org/W4393803066","https://openalex.org/W1987931999","https://openalex.org/W4310083477","https://openalex.org/W2328553770"],"abstract_inverted_index":{"Interactive":[0],"Reinforcement":[1,23],"Learning":[2],"(IntRL)":[3],"with":[4,142,156],"human":[5,45,60,92,107,118,170],"advice":[6,32,36,46,61,84,93,144,160,190],"has":[7],"shown":[8],"great":[9],"potential":[10],"for":[11,40,90,176],"human-guided":[12],"self-improvement":[13],"of":[14,44,106,111,167,196],"robots":[15],"and":[16,94,115,159],"can":[17,52],"accelerate":[18],"learning":[19],"compared":[20,191],"to":[21,54,103,188,192],"traditional":[22],"Learning.":[24],"However,":[25],"most":[26],"existing":[27],"approaches":[28],"assume":[29],"perfectly":[30],"correct":[31],"or":[33],"partially":[34],"incorrect":[35,189],"is":[37,185],"only":[38],"accounted":[39],"by":[41,85],"assessing":[42],"trustworthiness":[43,105],"equally":[47],"across":[48],"all":[49],"states.":[50],"This":[51],"lead":[53],"problems":[55],"in":[56,65,71,130,140,198],"practical":[57],"scenarios,":[58],"where":[59],"might":[62],"be":[63],"inaccurate":[64],"some":[66],"states":[67],"but":[68],"still":[69],"useful":[70],"others.":[72],"We":[73,99],"propose":[74],"a":[75,87,116,151,193],"novel":[76],"IntRL":[77],"algorithm":[78],"that":[79,146,182],"handles":[80],"state-dependent":[81,128],"unreliable":[82,177],"action":[83],"computing":[86],"trust":[88,129,197],"estimate":[89],"both":[91],"the":[95,127,131,165,168,199],"robot\u2019s":[96,132],"own":[97],"policy.":[98,200],"use":[100],"three":[101],"indicators":[102],"assess":[104],"advice,":[108,112],"namely":[109],"consistency":[110],"retrospective":[113],"optimality,":[114],"multi-modal":[117],"uncertainty":[119,158,171],"classifier":[120],"based":[121],"on":[122],"behavioral":[123],"cues.":[124],"For":[125],"estimating":[126],"policy,":[133],"we":[134,180],"compare":[135],"five":[136],"different":[137],"methods.":[138],"Evaluations":[139],"gridworlds":[141],"simulated":[143],"show":[145,181],"our":[147,183],"approach":[148,184],"significantly":[149],"outperforms":[150],"state-independent":[152,194],"baseline.":[153],"Robotic":[154],"experiments":[155],"perceptual":[157],"from":[161],"26":[162],"participants":[163],"confirm":[164],"usefulness":[166],"included":[169],"classification":[172],"as":[173],"an":[174],"indicator":[175],"advice.":[178],"Additionally,":[179],"more":[186],"robust":[187],"computation":[195]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
