{"id":"https://openalex.org/W4387830560","doi":"https://doi.org/10.1007/978-3-031-44067-0_4","title":"Contrastive Visual Explanations for\u00a0Reinforcement Learning via\u00a0Counterfactual Rewards","display_name":"Contrastive Visual Explanations for\u00a0Reinforcement Learning via\u00a0Counterfactual Rewards","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4387830560","doi":"https://doi.org/10.1007/978-3-031-44067-0_4"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-031-44067-0_4","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-031-44067-0_4","pdf_url":null,"source":{"id":"https://openalex.org/S2764900261","display_name":"Communications in computer and information science","issn_l":"1865-0929","issn":["1865-0929","1865-0937"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications in Computer and Information Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://research-information.bris.ac.uk/en/publications/7b82f7b9-5b63-4259-ae74-cea15748ea40","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100390749","display_name":"Xiaowei Liu","orcid":"https://orcid.org/0000-0001-7276-0787"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Xiaowei Liu","raw_affiliation_strings":["School of Engineering Mathematics and Technology, University of Bristol, Bristol, UK"],"affiliations":[{"raw_affiliation_string":"School of Engineering Mathematics and Technology, University of Bristol, Bristol, UK","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090059618","display_name":"Kevin McAreavey","orcid":null},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kevin McAreavey","raw_affiliation_strings":["School of Engineering Mathematics and Technology, University of Bristol, Bristol, UK"],"affiliations":[{"raw_affiliation_string":"School of Engineering Mathematics and Technology, University of Bristol, Bristol, UK","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002349071","display_name":"Weiru Liu","orcid":"https://orcid.org/0000-0001-8356-1361"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Weiru Liu","raw_affiliation_strings":["School of Engineering Mathematics and Technology, University of Bristol, Bristol, UK"],"affiliations":[{"raw_affiliation_string":"School of Engineering Mathematics and Technology, University of Bristol, Bristol, UK","institution_ids":["https://openalex.org/I36234482"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100390749"],"corresponding_institution_ids":["https://openalex.org/I36234482"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38721622,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"72","last_page":"87"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.9761042594909668},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.658336877822876},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6454307436943054},{"id":"https://openalex.org/keywords/attribution","display_name":"Attribution","score":0.5630384683609009},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4822986125946045},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4645495116710663},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4308106601238251},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.32646140456199646},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2554999887943268},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.172275573015213},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.1703474223613739}],"concepts":[{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.9761042594909668},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.658336877822876},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6454307436943054},{"id":"https://openalex.org/C143299363","wikidata":"https://www.wikidata.org/wiki/Q900584","display_name":"Attribution","level":2,"score":0.5630384683609009},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4822986125946045},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4645495116710663},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4308106601238251},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.32646140456199646},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2554999887943268},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.172275573015213},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.1703474223613739},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/978-3-031-44067-0_4","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-031-44067-0_4","pdf_url":null,"source":{"id":"https://openalex.org/S2764900261","display_name":"Communications in computer and information science","issn_l":"1865-0929","issn":["1865-0929","1865-0937"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications in Computer and Information Science","raw_type":"book-chapter"},{"id":"pmh:oai:research-information.bris.ac.uk:openaire_cris_publications/7b82f7b9-5b63-4259-ae74-cea15748ea40","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:research-information.bris.ac.uk:publications/7b82f7b9-5b63-4259-ae74-cea15748ea40","is_oa":true,"landing_page_url":"https://research-information.bris.ac.uk/en/publications/7b82f7b9-5b63-4259-ae74-cea15748ea40","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:research-information.bris.ac.uk:publications/7b82f7b9-5b63-4259-ae74-cea15748ea40","is_oa":true,"landing_page_url":"https://research-information.bris.ac.uk/en/publications/7b82f7b9-5b63-4259-ae74-cea15748ea40","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6000000238418579,"id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1884355088","display_name":null,"funder_award_id":"EP/T026820/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8578728416","display_name":null,"funder_award_id":"EP/T026707/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1640802403","https://openalex.org/W1977655452","https://openalex.org/W2334782222","https://openalex.org/W2594227402","https://openalex.org/W2594336441","https://openalex.org/W2766447205","https://openalex.org/W2897798332","https://openalex.org/W2899652826","https://openalex.org/W2901232217","https://openalex.org/W2917767525","https://openalex.org/W2941205169","https://openalex.org/W2952561542","https://openalex.org/W2962702317","https://openalex.org/W2963095307","https://openalex.org/W2966578325","https://openalex.org/W2966831185","https://openalex.org/W2974395694","https://openalex.org/W2989847975","https://openalex.org/W2990984513","https://openalex.org/W2996001543","https://openalex.org/W2998004401","https://openalex.org/W3004798888","https://openalex.org/W3013495647","https://openalex.org/W3037614835","https://openalex.org/W3040802234","https://openalex.org/W3080539309","https://openalex.org/W3082925502","https://openalex.org/W3094459042","https://openalex.org/W3118210634","https://openalex.org/W3124922852","https://openalex.org/W3164011142","https://openalex.org/W3177471157","https://openalex.org/W3185816745","https://openalex.org/W3188848629","https://openalex.org/W3189365070","https://openalex.org/W4220970489","https://openalex.org/W4246262524","https://openalex.org/W4387830586","https://openalex.org/W4390188019","https://openalex.org/W6604578553","https://openalex.org/W6659729074"],"related_works":["https://openalex.org/W3201448254","https://openalex.org/W4286970243","https://openalex.org/W2066431708","https://openalex.org/W3025615835","https://openalex.org/W4384133558","https://openalex.org/W173210993","https://openalex.org/W2390660599","https://openalex.org/W3003410553","https://openalex.org/W3028847759","https://openalex.org/W2073196990"],"abstract_inverted_index":{"Causal":[0],"attribution":[1],"aided":[2],"by":[3,47],"counterfactual":[4],"reasoning":[5],"is":[6,95],"recognised":[7],"as":[8],"a":[9,20],"key":[10],"feature":[11],"of":[12,67,123],"human":[13,114],"explanation.":[14],"In":[15],"this":[16],"paper":[17],"we":[18],"propose":[19],"post-hoc":[21],"contrastive":[22],"explanation":[23],"framework":[24,43],"for":[25,111],"reinforcement":[26],"learning":[27],"(RL)":[28],"based":[29,69,77,104],"on":[30,70,78,86,105],"comparing":[31],"learned":[32,49,102],"policies":[33,103,118],"under":[34],"actual":[35],"environmental":[36],"rewards":[37],"vs.":[38],"hypothetical":[39],"(counterfactual)":[40],"rewards.":[41],"The":[42],"provides":[44],"policy-level":[45],"explanations":[46,57,75],"accessing":[48],"Q-functions":[50],"and":[51,119],"identifying":[52],"intersecting":[53],"critical":[54],"states.":[55,82],"Global":[56],"are":[58,76],"generated":[59],"to":[60,97],"summarise":[61],"policy":[62],"behaviour":[63],"through":[64],"the":[65,79,99,109,121],"visualisation":[66],"sub-trajectories":[68],"these":[71],"states,":[72],"while":[73],"local":[74],"action-values":[80],"in":[81,128],"We":[83],"conduct":[84],"experiments":[85],"several":[87],"grid-world":[88],"examples.":[89],"Our":[90],"results":[91],"show":[92],"that":[93],"it":[94],"possible":[96],"explain":[98],"difference":[100],"between":[101],"Q-functions.":[106],"This":[107],"demonstrates":[108],"potential":[110],"more":[112],"informed":[113],"decision-making":[115],"when":[116],"deploying":[117],"highlights":[120],"possibility":[122],"developing":[124],"further":[125],"XAI":[126],"techniques":[127],"RL.":[129]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
