{"id":"https://openalex.org/W7131126137","doi":"https://doi.org/10.1109/iccvw69036.2025.00777","title":"ZOD: Zero-Shot and Out-of-Distribution Detection Dataset for Document Images","display_name":"ZOD: Zero-Shot and Out-of-Distribution Detection Dataset for Document Images","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W7131126137","doi":"https://doi.org/10.1109/iccvw69036.2025.00777"},"language":null,"primary_location":{"id":"doi:10.1109/iccvw69036.2025.00777","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00777","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012921964","display_name":"Talha Uddin Sheikh","orcid":"https://orcid.org/0009-0004-9156-5679"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Talha Uddin Sheikh","raw_affiliation_strings":["DFKI"],"affiliations":[{"raw_affiliation_string":"DFKI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067569032","display_name":"S. N. Sinha","orcid":"https://orcid.org/0009-0009-6820-3633"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sankalp Sinha","raw_affiliation_strings":["DFKI"],"affiliations":[{"raw_affiliation_string":"DFKI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126632121","display_name":"Shino Sam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shino Sam","raw_affiliation_strings":["DFKI"],"affiliations":[{"raw_affiliation_string":"DFKI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126617224","display_name":"Didier Stricker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Didier Stricker","raw_affiliation_strings":["DFKI"],"affiliations":[{"raw_affiliation_string":"DFKI","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126622929","display_name":"Muhammad Zeshan Afzal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muhammad Zeshan Afzal","raw_affiliation_strings":["DFKI"],"affiliations":[{"raw_affiliation_string":"DFKI","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5012921964"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.75037649,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7545","last_page":"7555"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.5454000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.5454000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.22609999775886536,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.03970000147819519,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7627000212669373},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7146000266075134},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5627999901771545},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5160999894142151},{"id":"https://openalex.org/keywords/document-classification","display_name":"Document classification","score":0.5145000219345093},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4092000126838684},{"id":"https://openalex.org/keywords/contextual-image-classification","display_name":"Contextual image classification","score":0.32850000262260437},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.3199999928474426}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7627000212669373},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7400000095367432},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7146000266075134},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5659000277519226},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5627999901771545},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5160999894142151},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.5145000219345093},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.505299985408783},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4092000126838684},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40310001373291016},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.32850000262260437},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.3199999928474426},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.3199999928474426},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.3061999976634979},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C72773152","wikidata":"https://www.wikidata.org/wiki/Q5287629","display_name":"Document layout analysis","level":3,"score":0.2702000141143799},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.26089999079704285},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccvw69036.2025.00777","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00777","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1528802670","https://openalex.org/W2031408949","https://openalex.org/W2098345386","https://openalex.org/W2156332201","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2289084343","https://openalex.org/W2334493732","https://openalex.org/W2400717490","https://openalex.org/W2605976347","https://openalex.org/W2765407302","https://openalex.org/W2769937543","https://openalex.org/W2910453440","https://openalex.org/W2962772269","https://openalex.org/W2963336383","https://openalex.org/W2963499153","https://openalex.org/W2964036463","https://openalex.org/W2992308087","https://openalex.org/W3003981162","https://openalex.org/W3013022628","https://openalex.org/W3014773921","https://openalex.org/W3035251701","https://openalex.org/W3159672047","https://openalex.org/W3162332195","https://openalex.org/W3174427107","https://openalex.org/W3174699664","https://openalex.org/W3176664887","https://openalex.org/W3177096435","https://openalex.org/W3198902415","https://openalex.org/W3212555189","https://openalex.org/W4226020328","https://openalex.org/W4226502635","https://openalex.org/W4246193833","https://openalex.org/W4285787895","https://openalex.org/W4304013646","https://openalex.org/W4304014014","https://openalex.org/W4307940854","https://openalex.org/W4319299836","https://openalex.org/W4385570363","https://openalex.org/W4386065569","https://openalex.org/W4386076493","https://openalex.org/W4390874554","https://openalex.org/W4394625573","https://openalex.org/W4399929809","https://openalex.org/W4402430817","https://openalex.org/W4408182988","https://openalex.org/W4408974494"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1,105],"paper,":[2],"we":[3],"introduce":[4],"the":[5,24,53,102,154,174,181],"first":[6],"dataset":[7,77,178,196],"specifically":[8],"designed":[9],"for":[10,47,82,152],"zero-shot":[11,93,114,162],"learning":[12],"(ZSL)":[13],"and":[14,50,94,113,130,145,161,183,193],"out-of-distribution":[15],"(OOD)":[16],"detection":[17,112,144],"in":[18,159,179],"document":[19,27,30,83,157,186],"images,":[20,84],"helping":[21],"to":[22,62,139,172],"advance":[23],"field":[25],"of":[26,91,101,156,176,185],"classification.":[28,116],"Traditional":[29],"classification":[31,187],"systems":[32,188],"often":[33],"struggle":[34],"with":[35,126],"handling":[36],"inputs":[37],"that":[38,78,87,189],"deviate":[39],"from":[40],"their":[41],"training":[42],"distributions,":[43],"a":[44,58,75,149],"critical":[45],"challenge":[46],"achieving":[48],"robustness":[49],"generalizability.":[51],"While":[52],"RVL-CDIP":[54,128],"corpus":[55],"serves":[56],"as":[57],"standard":[59],"benchmark":[60],"due":[61],"its":[63],"extensive":[64],"scale,":[65],"it":[66],"primarily":[67],"supports":[68],"in-distribution":[69],"evaluations.":[70],"Furthermore,":[71],"while":[72],"there":[73],"is":[74,197],"small":[76],"address":[79],"OOD":[80,95,111,143],"tasks":[81],"but":[85],"none":[86],"simultaneously":[88],"incorporate":[89],"elements":[90],"both":[92,110,142,191],"detection.":[96],"Our":[97],"dataset,":[98],"an":[99],"extension":[100],"RVL-CDIP,":[103],"fills":[104],"crucial":[106],"gap":[107],"by":[108],"facilitating":[109],"image":[115],"It":[117],"comprises":[118],"approximately":[119],"38,000":[120],"images":[121],"across":[122],"ten":[123],"classes-five":[124],"overlapping":[125],"traditional":[127],"categories":[129],"five":[131],"completely":[132],"new.":[133],"This":[134],"unique":[135],"setup":[136],"enables":[137],"researchers":[138],"rigorously":[140],"test":[141],"ZSL":[146],"capabilities,":[147],"providing":[148],"robust":[150,192],"framework":[151],"evaluating":[153],"resilience":[155],"classifiers":[158],"supervised":[160],"settings.":[163],"We":[164],"present":[165],"comprehensive":[166],"benchmarking":[167],"results,":[168],"using":[169],"state-of-the-art":[170],"models,":[171],"demonstrate":[173],"utility":[175],"our":[177],"enhancing":[180],"development":[182],"assessment":[184],"are":[190],"generalizable.":[194],"The":[195],"publicly":[198],"available":[199],"at:":[200],"h":[201],"t":[202,203],"ps:":[204],"//github.com/mindgarage-shan/zod.git":[205]},"counts_by_year":[],"updated_date":"2026-02-25T06:17:34.324206","created_date":"2026-02-24T00:00:00"}
