{"id":"https://openalex.org/W4313563086","doi":"https://doi.org/10.1109/humanoids53995.2022.10000078","title":"Learning from Unreliable Human Action Advice in Interactive Reinforcement Learning","display_name":"Learning from Unreliable Human Action Advice in Interactive Reinforcement Learning","publication_year":2022,"publication_date":"2022-11-28","ids":{"openalex":"https://openalex.org/W4313563086","doi":"https://doi.org/10.1109/humanoids53995.2022.10000078"},"language":"en","primary_location":{"id":"doi:10.1109/humanoids53995.2022.10000078","is_oa":false,"landing_page_url":"https://doi.org/10.1109/humanoids53995.2022.10000078","pdf_url":null,"source":{"id":"https://openalex.org/S4363608580","display_name":"2022 IEEE-RAS 21st International Conference on Humanoid Robots (Humanoids)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE-RAS 21st International Conference on Humanoid Robots (Humanoids)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086915419","display_name":"Lisa Scherf","orcid":null},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Lisa Scherf","raw_affiliation_strings":["Interactive AI &#x0026; Cognitive Models for AI interaction","Centre for Cognitive Science, TU Darmstadt, Germany"],"affiliations":[{"raw_affiliation_string":"Interactive AI &#x0026; Cognitive Models for AI interaction","institution_ids":[]},{"raw_affiliation_string":"Centre for Cognitive Science, TU Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014107463","display_name":"Cigdem Turan","orcid":"https://orcid.org/0000-0002-4836-6023"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Cigdem Turan","raw_affiliation_strings":["Interactive AI &#x0026; Cognitive Models for AI interaction","Centre for Cognitive Science, TU Darmstadt, Germany"],"affiliations":[{"raw_affiliation_string":"Interactive AI &#x0026; Cognitive Models for AI interaction","institution_ids":[]},{"raw_affiliation_string":"Centre for Cognitive Science, TU Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063353075","display_name":"Dorothea Koert","orcid":"https://orcid.org/0000-0002-3571-6848"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dorothea Koert","raw_affiliation_strings":["Interactive AI &#x0026; Cognitive Models for AI interaction","Centre for Cognitive Science, TU Darmstadt, Germany"],"affiliations":[{"raw_affiliation_string":"Interactive AI &#x0026; Cognitive Models for AI interaction","institution_ids":[]},{"raw_affiliation_string":"Centre for Cognitive Science, TU Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5086915419"],"corresponding_institution_ids":["https://openalex.org/I31512782"],"apc_list":null,"apc_paid":null,"fwci":0.4158,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.60019392,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"895","last_page":"902"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9714999794960022,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/advice","display_name":"Advice (programming)","score":0.8799507021903992},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7585254907608032},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7393055558204651},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.7168142795562744},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6161613464355469},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5690532326698303},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.44975584745407104},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.39447587728500366}],"concepts":[{"id":"https://openalex.org/C2779955035","wikidata":"https://www.wikidata.org/wiki/Q4686785","display_name":"Advice (programming)","level":2,"score":0.8799507021903992},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7585254907608032},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7393055558204651},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.7168142795562744},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6161613464355469},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5690532326698303},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44975584745407104},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39447587728500366},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/humanoids53995.2022.10000078","is_oa":false,"landing_page_url":"https://doi.org/10.1109/humanoids53995.2022.10000078","pdf_url":null,"source":{"id":"https://openalex.org/S4363608580","display_name":"2022 IEEE-RAS 21st International Conference on Humanoid Robots (Humanoids)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE-RAS 21st International Conference on Humanoid Robots (Humanoids)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8647972370","display_name":null,"funder_award_id":"01IS20045","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"}],"funders":[{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W121023703","https://openalex.org/W197704362","https://openalex.org/W1588539311","https://openalex.org/W1655830068","https://openalex.org/W1996626416","https://openalex.org/W1997618609","https://openalex.org/W2032496699","https://openalex.org/W2043462999","https://openalex.org/W2044865707","https://openalex.org/W2074056782","https://openalex.org/W2098441518","https://openalex.org/W2110064869","https://openalex.org/W2116157560","https://openalex.org/W2121863487","https://openalex.org/W2275119517","https://openalex.org/W2294422333","https://openalex.org/W2477633158","https://openalex.org/W2513140567","https://openalex.org/W2574075402","https://openalex.org/W2740302738","https://openalex.org/W2754794180","https://openalex.org/W2775677125","https://openalex.org/W2789322388","https://openalex.org/W2807366678","https://openalex.org/W2837981523","https://openalex.org/W2898227854","https://openalex.org/W2914170332","https://openalex.org/W2944766483","https://openalex.org/W2957311632","https://openalex.org/W2973093144","https://openalex.org/W3033075553","https://openalex.org/W3039116038","https://openalex.org/W3090894178","https://openalex.org/W3091272913","https://openalex.org/W3135497281","https://openalex.org/W3171912333","https://openalex.org/W3175395689","https://openalex.org/W4214717370","https://openalex.org/W4233216703","https://openalex.org/W6604963999","https://openalex.org/W6607978175","https://openalex.org/W6674812091","https://openalex.org/W6677262939","https://openalex.org/W6696972564","https://openalex.org/W6731856356","https://openalex.org/W6744338129","https://openalex.org/W6755473067","https://openalex.org/W6768100995","https://openalex.org/W6779596023"],"related_works":["https://openalex.org/W4393601209","https://openalex.org/W3090906284","https://openalex.org/W253876680","https://openalex.org/W4393803066","https://openalex.org/W1987931999","https://openalex.org/W4293797372","https://openalex.org/W4238052600","https://openalex.org/W4254639292","https://openalex.org/W3005176110","https://openalex.org/W2358024260"],"abstract_inverted_index":{"Interactive":[0],"Reinforcement":[1],"Learning":[2],"(IRL)":[3],"uses":[4],"human":[5,30,45,59,75,104,133],"input":[6,26,76],"to":[7,69,167],"improve":[8],"learning":[9,13],"speed":[10],"and":[11,127,143,160],"enable":[12],"in":[14,58,71,81,89,107,139,144],"more":[15],"complex":[16],"environments.":[17],"Human":[18],"action":[19,46,60,105],"advice":[20,55,61,106],"is":[21,86],"here":[22],"one":[23],"of":[24,43,63,121,125,163],"the":[25,41],"channels":[27],"preferred":[28],"by":[29],"users.":[31],"However,":[32],"many":[33],"existing":[34],"IRL":[35],"approaches":[36,50],"do":[37],"not":[38],"explicitly":[39],"consider":[40],"possibility":[42],"inaccurate":[44,54,79],"advice.":[47,169],"Moreover,":[48],"most":[49],"that":[51,99,130,153],"account":[52],"for":[53,116],"compute":[56],"trust":[57],"independent":[62],"a":[64,96,140,157],"state.":[65],"This":[66],"can":[67,100],"lead":[68],"problems":[70],"practical":[72],"cases,":[73],"where":[74],"might":[77],"be":[78],"only":[80],"some":[82],"states":[83],"while":[84],"it":[85],"still":[87],"useful":[88],"others.":[90],"To":[91],"this":[92],"end,":[93],"we":[94,110],"propose":[95],"novel":[97],"algorithm":[98],"handle":[101],"state-dependent":[102],"unreliable":[103,117,168],"IRL.":[108],"Here,":[109],"combine":[111],"three":[112],"potential":[113],"indicator":[114],"signals":[115],"advice,":[118,122,126],"i.e.":[119],"consistency":[120],"retrospective":[123],"optimality":[124],"behavioral":[128,164],"cues":[129,165],"hint":[131],"at":[132],"uncertainty.":[134],"We":[135,151],"evaluate":[136],"our":[137,154],"method":[138,155],"simulated":[141],"gridworld":[142],"robotic":[145],"sorting":[146],"tasks":[147],"with":[148],"28":[149],"subjects.":[150],"show":[152],"outperforms":[156],"state-independent":[158],"baseline":[159],"analyze":[161],"occurrences":[162],"related":[166]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
