{"id":"https://openalex.org/W7092284095","doi":"https://doi.org/10.5281/zenodo.17366964","title":"ClapperText: A Benchmark for Text Recognition in Low-Resource Archival Documents","display_name":"ClapperText: A Benchmark for Text Recognition in Low-Resource Archival Documents","publication_year":2025,"publication_date":"2025-10-16","ids":{"openalex":"https://openalex.org/W7092284095","doi":"https://doi.org/10.5281/zenodo.17366964"},"language":null,"primary_location":{"id":"doi:10.5281/zenodo.17366964","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17366964","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.17366964","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Lin, Tingyu","orcid":"https://orcid.org/0009-0008-9825-686X"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Lin, Tingyu","raw_affiliation_strings":["Computer Vision Lab, TU Wien"],"affiliations":[{"raw_affiliation_string":"Computer Vision Lab, TU Wien","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Peer, Marco","orcid":"https://orcid.org/0000-0001-6843-0830"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Peer, Marco","raw_affiliation_strings":["Computer Vision Lab, TU Wien"],"affiliations":[{"raw_affiliation_string":"Computer Vision Lab, TU Wien","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kleber, Florian","orcid":"https://orcid.org/0000-0001-8351-5066"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Kleber, Florian","raw_affiliation_strings":["Computer Vision Lab, TU Wien"],"affiliations":[{"raw_affiliation_string":"Computer Vision Lab, TU Wien","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"last","author":{"id":null,"display_name":"Sablatnig, Robert","orcid":"https://orcid.org/0000-0003-4195-1593"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Sablatnig, Robert","raw_affiliation_strings":["Computer Vision Lab, TU Wien"],"affiliations":[{"raw_affiliation_string":"Computer Vision Lab, TU Wien","institution_ids":["https://openalex.org/I145847075"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I145847075"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9625999927520752,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9625999927520752,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.003700000001117587,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.0035000001080334187,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6118999719619751},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5940999984741211},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.5210999846458435},{"id":"https://openalex.org/keywords/disjoint-sets","display_name":"Disjoint sets","score":0.48080000281333923},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.44290000200271606},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.41100001335144043},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.4041999876499176},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37770000100135803}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8083000183105469},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6463000178337097},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6118999719619751},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5940999984741211},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.5210999846458435},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49230000376701355},{"id":"https://openalex.org/C45340560","wikidata":"https://www.wikidata.org/wiki/Q215382","display_name":"Disjoint sets","level":2,"score":0.48080000281333923},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4542999863624573},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.44290000200271606},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.4041999876499176},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.34369999170303345},{"id":"https://openalex.org/C14740026","wikidata":"https://www.wikidata.org/wiki/Q1136665","display_name":"Vignetting","level":3,"score":0.34220001101493835},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.33719998598098755},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.30149999260902405},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.28619998693466187},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2838999927043915},{"id":"https://openalex.org/C150856459","wikidata":"https://www.wikidata.org/wiki/Q8034367","display_name":"Word recognition","level":3,"score":0.27900001406669617},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2635999917984009}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.17366964","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17366964","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.17366964","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17366964","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"ClapperText":[0,3,79],"Dataset":[1],"Overview":[2],"is":[4,21,80,255],"a":[5],"benchmark":[6,248],"dataset":[7,46],"for":[8,104,159,242],"text":[9,53,105],"detection":[10,139],"and":[11,16,41,58,65,88,93,188,240,277,296],"recognition":[12,87],"in":[13,91,119,151,172,251],"visually":[14,94],"degraded":[15],"low-resource":[17,261],"archival":[18,96],"conditions.":[19],"It":[20],"derived":[22,136],"from":[23,137],"127":[24,230],"World":[25],"War":[26],"II\u2013era":[27],"film":[28],"segments":[29],"containing":[30],"clapperboards":[31],"that":[32,182],"record":[33],"structured":[34],"production":[35],"metadata":[36],"such":[37],"as":[38],"date,":[39],"location,":[40],"camera":[42],"operator":[43],"identity.":[44],"The":[45],"contains:\u2022":[47],"9,813":[48,231],"annotated":[49],"frames\u2022":[50],"94,573":[51,232],"word-level":[52],"instances\u2022":[54,62],"67.4%":[55],"handwritten":[56],"words":[57],"1,566":[59],"partially":[60],"occluded":[61],"Both":[63],"full-frame":[64],"cropped":[66,160],"word":[67,134,161],"images\u2022":[68],"Precise":[69],"4-point":[70],"polygon":[71],"annotations":[72,125,158],"to":[73,82,244,259],"support":[74],"spatially":[75],"accurate":[76],"OCR":[77],"applications":[78],"designed":[81],"advance":[83],"robust":[84],"optical":[85],"character":[86],"document":[89],"understanding":[90],"historical":[92,262],"challenging":[95],"materials.":[97],"Folder":[98],"Meaning":[99],"detection/imgs":[100],"\u2013":[101,123,132,155,287],"Full-frame":[102],"images":[103,135,162],"detection\u2022":[106],"Subfolders":[107],"train,":[108,141],"val,":[109],"test:":[110],"disjoint":[111,143],"video":[112,144],"splits\u2022":[113,145],"test_keyframes:":[114,146],"manually":[115,147],"verified":[116,148],"frames":[117,149],"used":[118,150],"reported":[120,152,250],"benchmarks":[121,153],"detection/annos":[122],"JSON":[124],"following":[126],"the":[127,138,173,252,282],"same":[128],"split":[129],"structure":[130],"recognition/imgs":[131],"Cropped":[133],"data\u2022":[140],"val:":[142],"recognition/annos":[154],"Corresponding":[156],"CSV":[157],"Each":[163],"subfolder":[164],"(e.g.,":[165],"8332_2_46_100_T)":[166],"represents:\u2022":[167],"8332":[168],"\u2192":[169,178,186,193],"Segment":[170],"index":[171],"HISTORIAN":[174],"source":[175],"dataset\u2022":[176],"2":[177],"Shot":[179,194],"number":[180],"within":[181],"segment\u2022":[183],"46,":[184],"100":[185],"Start":[187],"end":[189],"frame":[190],"indices\u2022":[191],"T":[192,197],"type":[195],"(here,":[196],"=":[198,270],"text)":[199],"Data":[200],"Summary":[201],"Split":[202],"Videos":[203],"Frames":[204],"Word":[205,273],"Annotations":[206],"Handwritten":[207],"(%)":[208,210],"Occluded":[209],"Train":[211],"18":[212],"1,122":[213],"17,749":[214],"72.3":[215],"4.8":[216],"Val":[217],"8":[218],"527":[219],"4,983":[220],"67.6":[221],"1.6":[222],"Test":[223],"101":[224],"8,164":[225],"71,841":[226],"66.2":[227],"0.9":[228],"Total":[229],"67.4":[233],"1.7":[234],"Recommended":[235],"Usage":[236],"\u2022":[237],"Use":[238],"detection/test_keyframes":[239],"recognition/test_keyframes":[241],"validation":[243],"ensure":[245],"comparability":[246],"with":[247],"results":[249],"paper.\u2022":[253],"Training":[254],"limited":[256],"(18":[257],"videos)":[258],"reflect":[260],"scenarios.\u2022":[263],"Evaluation":[264],"metrics:\u2013":[265],"Detection:":[266],"Hmean":[267],"@":[268],"IoU":[269],"0.5\u2013":[271],"Recognition:":[272],"Recognition":[274],"Accuracy":[275],"(case-":[276],"symbol-normalized)":[278],"Acknowledgements":[279],"Supported":[280],"by":[281],"Austrian":[283],"Science":[284],"Fund":[285],"(FWF)":[286],"doc.funds.connectProject":[288],"No.":[289],"DFH":[290],"37-N:":[291],"Visual":[292,294],"Heritage:":[293],"Analytics":[295],"Computer":[297],"Vision":[298],"Meet":[299],"Cultural":[300],"Heritage.":[301],"More":[302],"Information":[303],"Further":[304],"details":[305],"are":[306],"available":[307],"at:https://github.com/linty5/ClapperText":[308],"For":[309],"questions,":[310],"please":[311],"contact:":[312],"tylin@cvl.tuwien.ac.at":[313]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-18T00:00:00"}
