{"id":"https://openalex.org/W6968010342","doi":"https://doi.org/10.5281/zenodo.11612556","title":"A Benchmark Suite for Systematically Evaluating Reasoning Shortcuts","display_name":"A Benchmark Suite for Systematically Evaluating Reasoning Shortcuts","publication_year":2024,"publication_date":"2024-06-12","ids":{"openalex":"https://openalex.org/W6968010342","doi":"https://doi.org/10.5281/zenodo.11612556"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.11612556","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.11612556","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.11612556","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Samuele, Bortolotti","orcid":null},"institutions":[{"id":"https://openalex.org/I193223587","display_name":"University of Trento","ror":"https://ror.org/05trd4x28","country_code":"IT","type":"education","lineage":["https://openalex.org/I193223587"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Samuele, Bortolotti","raw_affiliation_strings":["University of Trento"],"affiliations":[{"raw_affiliation_string":"University of Trento","institution_ids":["https://openalex.org/I193223587"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Emanuele, Marconato","orcid":null},"institutions":[{"id":"https://openalex.org/I193223587","display_name":"University of Trento","ror":"https://ror.org/05trd4x28","country_code":"IT","type":"education","lineage":["https://openalex.org/I193223587"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Emanuele, Marconato","raw_affiliation_strings":["University of Trento"],"affiliations":[{"raw_affiliation_string":"University of Trento","institution_ids":["https://openalex.org/I193223587"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tommaso, Carraro","orcid":null},"institutions":[{"id":"https://openalex.org/I2277624104","display_name":"Fondazione Bruno Kessler","ror":"https://ror.org/01j33xk10","country_code":"IT","type":"facility","lineage":["https://openalex.org/I2277624104"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Tommaso, Carraro","raw_affiliation_strings":["Fondazione Bruno Kessler"],"affiliations":[{"raw_affiliation_string":"Fondazione Bruno Kessler","institution_ids":["https://openalex.org/I2277624104"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Paolo, Morettin","orcid":null},"institutions":[{"id":"https://openalex.org/I193223587","display_name":"University of Trento","ror":"https://ror.org/05trd4x28","country_code":"IT","type":"education","lineage":["https://openalex.org/I193223587"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Paolo, Morettin","raw_affiliation_strings":["University of Trento"],"affiliations":[{"raw_affiliation_string":"University of Trento","institution_ids":["https://openalex.org/I193223587"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Emile, van Krieken","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Emile, van Krieken","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Antonio, Vergari","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Antonio, Vergari","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Stefano, Teso","orcid":null},"institutions":[{"id":"https://openalex.org/I193223587","display_name":"University of Trento","ror":"https://ror.org/05trd4x28","country_code":"IT","type":"education","lineage":["https://openalex.org/I193223587"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Stefano, Teso","raw_affiliation_strings":["University of Trento"],"affiliations":[{"raw_affiliation_string":"University of Trento","institution_ids":["https://openalex.org/I193223587"]}]},{"author_position":"last","author":{"id":null,"display_name":"Andrea, Passerini","orcid":"https://orcid.org/0000-0002-2765-5395"},"institutions":[{"id":"https://openalex.org/I193223587","display_name":"University of Trento","ror":"https://ror.org/05trd4x28","country_code":"IT","type":"education","lineage":["https://openalex.org/I193223587"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Andrea, Passerini","raw_affiliation_strings":["University of Trento"],"affiliations":[{"raw_affiliation_string":"University of Trento","institution_ids":["https://openalex.org/I193223587"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I193223587"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.7559000253677368},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.7250999808311462},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6503000259399414},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5041000247001648},{"id":"https://openalex.org/keywords/rss","display_name":"RSS","score":0.45989999175071716},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.436599999666214},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4214000105857849},{"id":"https://openalex.org/keywords/mit-license","display_name":"MIT License","score":0.40230000019073486}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8513000011444092},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.7559000253677368},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.7250999808311462},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6503000259399414},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5785999894142151},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5209000110626221},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5041000247001648},{"id":"https://openalex.org/C2385561","wikidata":"https://www.wikidata.org/wiki/Q45432","display_name":"RSS","level":2,"score":0.45989999175071716},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.436599999666214},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4214000105857849},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42100000381469727},{"id":"https://openalex.org/C174183944","wikidata":"https://www.wikidata.org/wiki/Q334661","display_name":"MIT License","level":3,"score":0.40230000019073486},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3824000060558319},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.38109999895095825},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3407999873161316},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.2978000044822693},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.29089999198913574},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2741999924182892},{"id":"https://openalex.org/C2776969324","wikidata":"https://www.wikidata.org/wiki/Q613918","display_name":"Software quality assurance","level":5,"score":0.26750001311302185},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.11612556","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.11612556","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.11612556","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.11612556","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Codebase":[0],"[Github]":[1],"|":[2],"Dataset":[3],"[Zenodo]":[4],"Abstract":[5],"The":[6,375,399,414],"advent":[7],"of":[8,31,98,132,200,276],"powerful":[9],"neural":[10,149],"classifiers":[11],"has":[12],"increased":[13],"interest":[14],"in":[15,134,146,242],"problems":[16,24],"that":[17,49,141,238,328,389],"require":[18],"both":[19,52,147,312],"learning":[20,53,135],"and":[21,38,42,54,122,150,178,187,207,249,278,314,343,348,356,396,446],"reasoning.":[22],"These":[23,267,334],"are":[25,190,283,336,377],"critical":[26],"for":[27,118,128,170,246,274,301,311,346,439],"understanding":[28],"important":[29],"properties":[30],"models,":[32],"such":[33,209],"as":[34,208],"trustworthiness,":[35],"generalization,":[36],"interpretability,":[37],"compliance":[39],"to":[40,77,93,106,174,231,285],"safety":[41],"structural":[43],"constraints.":[44],"However,":[45],"recent":[46],"research":[47],"observed":[48],"tasks":[50,109],"requiring":[51],"reasoning":[55,62,70],"on":[56,100,160,172,339,373,410],"background":[57],"knowledge":[58],"often":[59],"suffer":[60],"from":[61,205,225,258,324,368,379,405,421],"shortcuts":[63],"(RSs):":[64],"predictors":[65],"can":[66,418],"solve":[67],"the":[68,74,78,96,130,166,176,193,198,213,222,232,240,255,259,291,302,325,380,391,411,422],"downstream":[69],"task":[71],"without":[72],"associating":[73],"correct":[75],"concepts":[76,145,313,347],"high-dimensional":[79],"data.":[80],"To":[81],"address":[82],"this":[83,318],"issue,":[84],"we":[85,139],"introduce":[86],"rsbench,":[87,138],"a":[88,154,226,243,331,369,406],"comprehensive":[89],"benchmark":[90],"suite":[91],"designed":[92],"systematically":[94],"evaluate":[95],"impact":[97],"RSs":[99,133],"models":[101,152],"by":[102,111,262,309,394],"providing":[103],"easy":[104],"access":[105,277],"highly":[107],"customizable":[108],"affected":[110],"RSs.":[112],"Furthermore,":[113],"rsbench":[114,157],"implements":[115],"common":[116],"metrics":[117],"evaluating":[119],"concept":[120],"quality":[121,144],"introduces":[123],"novel":[124],"formal":[125],"verification":[126],"procedures":[127],"assessing":[129],"presence":[131],"tasks.":[136],"Using":[137],"highlight":[140],"obtaining":[142],"high":[143],"purely":[148],"neuro-symbolic":[151],"is":[153,158,203,210,307],"far-from-solved":[155],"problem.":[156],"available":[159,273],"Github.":[161],"Usage":[162],"We":[163],"recommend":[164],"visiting":[165],"official":[167],"code":[168],"website":[169],"instructions":[171],"how":[173],"use":[175,286],"dataset":[177,241],"accompaying":[179],"software":[180],"code.":[181],"License":[182],"All":[183],"ready-made":[184],"data":[185,388],"sets":[186],"generated":[188,300,358],"datasets":[189,268,417],"distributed":[191,211],"under":[192,212],"CC-BY-SA":[194],"4.0":[195],"license,":[196],"with":[197],"exception":[199],"Kand-Logic,":[201],"which":[202],"derived":[204,378],"Kandinsky-patterns":[206],"GPL-3.0":[214],"license.":[215],"Datasets":[216],"Overview":[217],"CLIP-embeddings.":[218],"This":[219,252,295,351,362],"folder":[220,296,352,400],"contains":[221,297,364,401],"saved":[223],"activations":[224],"pretrained":[227,370,407],"CLIP":[228],"model":[229,372,409],"applied":[230],"tested":[233],"dataset.":[234,413],"It":[235],"includes":[236,353],"embeddings":[237,366,376,403],"represent":[239],"format":[244],"suitable":[245],"further":[247,279],"analysis":[248],"experimentation.":[250],"BDD_OIA-original-dataset.":[251],"directory":[253,363],"holds":[254],"original":[256,292,415],"files":[257],"X-OIA":[260],"project":[261,327],"Xu":[263,432],"et":[264,433],"al.":[265],"[1].":[266],"have":[269,329],"been":[270],"made":[271],"publicly":[272],"ease":[275],"research.":[280],"If":[281],"you":[282,320,384],"going":[284],"it,":[287],"please":[288],"consider":[289],"citing":[290],"authors.":[293],"kand-logic-3k.":[294],"all":[298,354],"images":[299,323,335,355],"Kand-Logic":[303,326],"project.":[304],"Each":[305],"image":[306],"accompanied":[308],"annotations":[310,345],"labels.":[315,349],"bbox-kand-logic-3k.":[316],"In":[317],"directory,":[319],"will":[321,385],"find":[322,386],"undergone":[330],"preprocessing":[332],"step.":[333],"extracted":[337,367,404],"based":[338],"bounding":[340],"boxes,":[341],"rescaled,":[342],"include":[344],"sdd-oia.":[350],"labels":[357],"using":[359],"rsbench.":[360],"sdd-oia-embeddings.":[361],"512-dimensional":[365],"ResNet18":[371],"ImageNet.":[374],"sdd-oia`dataset.":[381],"BDD-OIA-preprocessed.":[382],"Here":[383],"preprocessed":[387],"follow":[390],"methodology":[392],"outlined":[393],"Sawada":[395,445],"Nakamura":[397],"[2].":[398],"2048-dimensional":[402],"Faster-RCNN":[408],"BDD-100k":[412],"BDD":[416,428],"be":[419],"downloaded":[420],"following":[423],"Google":[424],"Drive":[425],"link:":[426],"[Download":[427],"Dataset].":[429],"References":[430],"[1]":[431],"al.,":[434],"*Explainable":[435],"Object-Induced":[436],"Action":[437],"Decision":[438],"Autonomous":[440],"Vehicles*,":[441],"CVPR":[442],"2020.":[443],"[2]":[444],"Nakamura,":[447],"*Concept":[448],"Bottleneck":[449],"Model":[450],"With":[451],"Additional":[452],"Unsupervised":[453],"Concepts*,":[454],"IEEE":[455],"2022.":[456]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
