{"id":"https://openalex.org/W4296632059","doi":"https://doi.org/10.3233/faia220189","title":"Learning to Cooperate with Human Evaluative Feedback and Demonstrations","display_name":"Learning to Cooperate with Human Evaluative Feedback and Demonstrations","publication_year":2022,"publication_date":"2022-09-19","ids":{"openalex":"https://openalex.org/W4296632059","doi":"https://doi.org/10.3233/faia220189"},"language":"en","primary_location":{"id":"doi:10.3233/faia220189","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia220189","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA220189","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA220189","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075631283","display_name":"Mehul Verma","orcid":null},"institutions":[{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Mehul Verma","raw_affiliation_strings":["Vrije Universiteit Amsterdam, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Vrije Universiteit Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I865915315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042454545","display_name":"Erman Acar","orcid":"https://orcid.org/0000-0001-7541-2999"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]},{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Erman Acar","raw_affiliation_strings":["LIACS, Universiteit Leiden, The Netherlands","Vrije Universiteit Amsterdam, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIACS, Universiteit Leiden, The Netherlands","institution_ids":["https://openalex.org/I121797337"]},{"raw_affiliation_string":"Vrije Universiteit Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I865915315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25109559,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11252","display_name":"Evolutionary Game Theory and Cooperation","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10646","display_name":"Experimental Behavioral Economics Studies","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cornerstone","display_name":"Cornerstone","score":0.6371213793754578},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.630366325378418},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6102504730224609},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5931867957115173},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4721263349056244},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3884626030921936},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3200913369655609}],"concepts":[{"id":"https://openalex.org/C2780616401","wikidata":"https://www.wikidata.org/wiki/Q1133673","display_name":"Cornerstone","level":2,"score":0.6371213793754578},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.630366325378418},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6102504730224609},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5931867957115173},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4721263349056244},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3884626030921936},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3200913369655609},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3233/faia220189","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia220189","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA220189","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},{"id":"pmh:vu:oai:research.vu.nl:publications/907a6818-332b-42ed-a1b7-dc78c1c0a47e","is_oa":true,"landing_page_url":"https://research.vu.nl/en/publications/907a6818-332b-42ed-a1b7-dc78c1c0a47e","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"HHAI2022: Augmenting Human Intellect: Proceedings of the 1st International Conference on Hybrid Human-Artificial Intelligence, 46 - 59","raw_type":"info:eu-repo/semantics/conferencepaper"}],"best_oa_location":{"id":"doi:10.3233/faia220189","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia220189","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA220189","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[{"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4296632059.pdf","grobid_xml":"https://content.openalex.org/works/W4296632059.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W197704362","https://openalex.org/W588976065","https://openalex.org/W1521003796","https://openalex.org/W1565845844","https://openalex.org/W1591675293","https://openalex.org/W1665214252","https://openalex.org/W1667072054","https://openalex.org/W1777239053","https://openalex.org/W2096145798","https://openalex.org/W2118756286","https://openalex.org/W2135358079","https://openalex.org/W2135504505","https://openalex.org/W2136822419","https://openalex.org/W2140584963","https://openalex.org/W2154018708","https://openalex.org/W2172933221","https://openalex.org/W2561664829","https://openalex.org/W2788862220","https://openalex.org/W2897558098","https://openalex.org/W2898227854","https://openalex.org/W2915055038","https://openalex.org/W2921435067","https://openalex.org/W2944766483","https://openalex.org/W2963489214","https://openalex.org/W2963658727","https://openalex.org/W2976398475","https://openalex.org/W2982316857","https://openalex.org/W2996037775","https://openalex.org/W3000002597","https://openalex.org/W3041747184","https://openalex.org/W3046399757","https://openalex.org/W3194049838","https://openalex.org/W4221152344","https://openalex.org/W4232117153","https://openalex.org/W4288593469","https://openalex.org/W4298857966","https://openalex.org/W6633821104","https://openalex.org/W6680365038"],"related_works":["https://openalex.org/W2088111589","https://openalex.org/W2321083758","https://openalex.org/W4366496304","https://openalex.org/W4229004718","https://openalex.org/W2375906709","https://openalex.org/W2350669386","https://openalex.org/W2348913748","https://openalex.org/W2381386433","https://openalex.org/W2045980693","https://openalex.org/W175865013"],"abstract_inverted_index":{"Cooperation":[0],"is":[1,32,164],"a":[2,11,41,84,129,144,189],"widespread":[3],"phenomenon":[4],"in":[5,13,40,62,83],"nature":[6],"that":[7,122,169,203],"has":[8,128],"also":[9,45,172],"been":[10],"cornerstone":[12],"the":[14,47,66,123,133,156,196],"development":[15],"of":[16,36,49,68,125,147,198],"human":[17,42,81,94,205],"intelligence.":[18],"Understanding":[19],"cooperation,":[20],"therefore,":[21],"on":[22,132],"matters":[23],"such":[24],"as":[25,161],"how":[26],"it":[27],"emerges,":[28],"develops,":[29],"or":[30],"fails":[31],"an":[33],"important":[34],"avenue":[35],"research,":[37],"not":[38],"only":[39],"context,":[43],"but":[44],"for":[46],"advancement":[48],"next":[50],"generation":[51],"artificial":[52],"intelligence":[53],"paradigms":[54],"which":[55],"are":[56],"presumably":[57],"human-compatible.":[58],"With":[59],"this":[60,211],"motivation":[61],"mind,":[63],"we":[64,90,120,167,187],"study":[65],"emergence":[67,197],"cooperative":[69,199],"behaviour":[70,200],"between":[71],"two":[72,116],"independent":[73],"deep":[74],"reinforcement":[75],"learning":[76],"(RL)":[77],"agents":[78,108,141,150],"provided":[79],"with":[80,143,177],"input":[82],"novel":[85,190],"game":[86,191],"environment.":[87],"In":[88,185],"particular,":[89],"investigate":[91],"whether":[92],"evaluative":[93,183],"feedback":[95,126,148,163,206],"(through":[96,102],"interactive":[97],"RL)":[98,104],"and":[99,155,201,207],"expert":[100,170],"demonstration":[101,171],"inverse":[103],"can":[105,209],"help":[106],"RL":[107],"to":[109,111,182,193],"learn":[110],"cooperate":[112],"better.":[113],"We":[114],"report":[115],"main":[117],"findings.":[118],"Firstly,":[119],"find":[121,168],"amount":[124,146],"given":[127],"positive":[130],"impact":[131],"accumulated":[134],"reward":[135],"obtained":[136],"through":[137],"cooperation.":[138],"That":[139],"is,":[140],"trained":[142,151],"limited":[145],"outperform":[149],"without":[152],"any":[153],"feedback,":[154],"performance":[157],"increases":[158],"even":[159],"further":[160],"more":[162,178],"provided.":[165],"Secondly,":[166],"helps":[173],"agents\u2019":[174],"performance,":[175],"although":[176],"modest":[179],"improvements":[180],"compared":[181],"feedback.":[184],"conclusion,":[186],"present":[188],"environment":[192],"better":[194],"understand":[195],"show":[202],"providing":[204],"demonstrations":[208],"accelerate":[210],"process.":[212]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
