{"id":"https://openalex.org/W4413756825","doi":"https://doi.org/10.1145/3704268.3748683","title":"Spurious Cues in RVL-CDIP and Tobacco3482 Document Classification: The Case of ID Codes","display_name":"Spurious Cues in RVL-CDIP and Tobacco3482 Document Classification: The Case of ID Codes","publication_year":2025,"publication_date":"2025-08-27","ids":{"openalex":"https://openalex.org/W4413756825","doi":"https://doi.org/10.1145/3704268.3748683"},"language":"en","primary_location":{"id":"doi:10.1145/3704268.3748683","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704268.3748683","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704268.3748683","source":null,"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Document Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3704268.3748683","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023740689","display_name":"Stefan Larson","orcid":null},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Stefan Larson","raw_affiliation_strings":["Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I200719446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119451133","display_name":"Sharad Duwal","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164273","display_name":"Fatmawati Hospital","ror":"https://ror.org/05hdpwb59","country_code":"ID","type":"healthcare","lineage":["https://openalex.org/I4210164273"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Sharad Duwal","raw_affiliation_strings":["Fatima Fellowship, Nepal"],"affiliations":[{"raw_affiliation_string":"Fatima Fellowship, Nepal","institution_ids":["https://openalex.org/I4210164273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119451134","display_name":"Brian Vilnrotter","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143272","display_name":"Camber Collective (United States)","ror":"https://ror.org/03bh5kx55","country_code":"US","type":"company","lineage":["https://openalex.org/I4210143272"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian Vilnrotter","raw_affiliation_strings":["ML Collective, USA"],"affiliations":[{"raw_affiliation_string":"ML Collective, USA","institution_ids":["https://openalex.org/I4210143272"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119451135","display_name":"Gayatri Chakkithara","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Gayatri Chakkithara","raw_affiliation_strings":["University of Toronto, Toronto, ON, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Toronto, ON, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119451136","display_name":"Vedant Padwal","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143272","display_name":"Camber Collective (United States)","ror":"https://ror.org/03bh5kx55","country_code":"US","type":"company","lineage":["https://openalex.org/I4210143272"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vedant Padwal","raw_affiliation_strings":["ML Collective, India"],"affiliations":[{"raw_affiliation_string":"ML Collective, India","institution_ids":["https://openalex.org/I4210143272"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030030910","display_name":"Kevin Leach","orcid":"https://orcid.org/0000-0002-4001-3442"},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Leach","raw_affiliation_strings":["Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I200719446"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5023740689"],"corresponding_institution_ids":["https://openalex.org/I200719446"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38064498,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14330","display_name":"Library Science and Information Systems","score":0.9345999956130981,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14330","display_name":"Library Science and Information Systems","score":0.9345999956130981,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.8499382734298706},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6022093892097473},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3829440772533417},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3419242799282074},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3284686803817749},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.16143575310707092}],"concepts":[{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.8499382734298706},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6022093892097473},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3829440772533417},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3419242799282074},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3284686803817749},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.16143575310707092}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3704268.3748683","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704268.3748683","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704268.3748683","source":null,"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Document Engineering","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3704268.3748683","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704268.3748683","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704268.3748683","source":null,"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Document Engineering","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Good health and well-being","id":"https://metadata.un.org/sdg/3","score":0.8100000023841858}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4413756825.pdf","grobid_xml":"https://content.openalex.org/works/W4413756825.grobid-xml"},"referenced_works_count":8,"referenced_works":["https://openalex.org/W2004665841","https://openalex.org/W2952984539","https://openalex.org/W3198902415","https://openalex.org/W4304014014","https://openalex.org/W4312233877","https://openalex.org/W4386566652","https://openalex.org/W4400488009","https://openalex.org/W4404781845"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W3113091479","https://openalex.org/W2162899405","https://openalex.org/W941090075","https://openalex.org/W2044987316","https://openalex.org/W2033914206","https://openalex.org/W2042327336","https://openalex.org/W3204019825"],"abstract_inverted_index":{"RVL-CDIP":[0,67,85,115,139],"and":[1,51,68,70,86,140,142],"Tobacco3482":[2,69,89,141],"are":[3,119],"commonly":[4],"used":[5,28],"document":[6,104],"classification":[7,38,78],"benchmarks,":[8],"but":[9],"recent":[10],"work":[11],"on":[12,20,36,57,84,88,114],"explainability":[13],"has":[14],"revealed":[15],"that":[16,72,101],"ID":[17,55,62,96,117,128,136],"codes":[18,56,63,118,137],"stamped":[19],"the":[21,37,49,95,122],"documents":[22,65],"in":[23,64,132],"these":[24,54],"datasets":[25],"may":[26],"be":[27],"by":[29],"machine":[30],"learning":[31,74],"models":[32,75],"to":[33,134],"learn":[34],"shortcuts":[35],"task.":[39],"In":[40],"this":[41,144],"paper,":[42],"we":[43,125],"present":[44],"an":[45,127],"in-depth":[46],"investigation":[47],"into":[48],"influence":[50],"impact":[52],"of":[53,81,110],"model":[58,131],"performance.":[59],"We":[60,98],"annotate":[61],"from":[66,94,121,138],"find":[71,100],"shallow":[73],"can":[76],"achieve":[77],"accuracy":[79,112],"scores":[80],"roughly":[82],"40%":[83],"60%":[87],"using":[90],"only":[91],"features":[92],"derived":[93],"codes.":[97],"also":[99],"a":[102,107],"state-of-the-art":[103],"classifier":[105],"sees":[106],"performance":[108],"drop":[109],"11":[111],"points":[113],"when":[116],"removed":[120],"data.":[123],"Finally,":[124],"train":[126],"code":[129],"detection":[130],"order":[133],"remove":[135],"make":[143],"data":[145],"publicly":[146],"available.":[147]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
