{"id":"https://openalex.org/W7160275193","doi":"https://doi.org/10.1109/wacv61042.2026.00627","title":"UniTabBank: A Large Scale Multi-Lingual, Multi-Layout, Multi-Type, Multi-Format Dataset for Table Detection","display_name":"UniTabBank: A Large Scale Multi-Lingual, Multi-Layout, Multi-Type, Multi-Format Dataset for Table Detection","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7160275193","doi":"https://doi.org/10.1109/wacv61042.2026.00627"},"language":null,"primary_location":{"id":"doi:10.1109/wacv61042.2026.00627","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00627","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052301812","display_name":"Ajoy Mondal","orcid":"https://orcid.org/0000-0002-4808-8860"},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ajoy Mondal","raw_affiliation_strings":["IIIT,CVIT,Hyderabad,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIIT,CVIT,Hyderabad,India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119194036","display_name":"Saumya Mundra","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Saumya Mundra","raw_affiliation_strings":["IIIT,CVIT,Hyderabad,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIIT,CVIT,Hyderabad,India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091745324","display_name":"Avijit Dasgupta","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Avijit Dasgupta","raw_affiliation_strings":["IIIT,CVIT,Hyderabad,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIIT,CVIT,Hyderabad,India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064321215","display_name":"C V Jawahar","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"C. V. Jawahar","raw_affiliation_strings":["IIIT,CVIT,Hyderabad,India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIIT,CVIT,Hyderabad,India","institution_ids":["https://openalex.org/I65181880"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.68640469,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6486","last_page":"6495"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.8952000141143799,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.8952000141143799,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.011099999770522118,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.008700000122189522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5644999742507935},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.5220000147819519},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.2782999873161316},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.2750999927520752},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.27390000224113464}],"concepts":[{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5644999742507935},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.5220000147819519},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45249998569488525},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.4336000084877014},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.350600004196167},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3452000021934509},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2782999873161316},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.24740000069141388}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wacv61042.2026.00627","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00627","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1903029394","https://openalex.org/W1970549718","https://openalex.org/W1988783898","https://openalex.org/W1990899722","https://openalex.org/W2022351003","https://openalex.org/W2031489346","https://openalex.org/W2046941907","https://openalex.org/W2051530142","https://openalex.org/W2104875837","https://openalex.org/W2111768419","https://openalex.org/W2117462434","https://openalex.org/W2139053978","https://openalex.org/W2150673968","https://openalex.org/W2193145675","https://openalex.org/W2444353601","https://openalex.org/W2787523828","https://openalex.org/W2795424778","https://openalex.org/W2914121031","https://openalex.org/W2963037989","https://openalex.org/W2963150697","https://openalex.org/W2963351448","https://openalex.org/W2964241181","https://openalex.org/W3003496674","https://openalex.org/W3003711898","https://openalex.org/W3003760744","https://openalex.org/W3004042913","https://openalex.org/W3004186774","https://openalex.org/W3014641072","https://openalex.org/W3034997246","https://openalex.org/W3080882316","https://openalex.org/W3096609285","https://openalex.org/W3163021734","https://openalex.org/W3172752666","https://openalex.org/W3202465222","https://openalex.org/W3204180818","https://openalex.org/W3213341651","https://openalex.org/W3217518891","https://openalex.org/W4206216093","https://openalex.org/W4388195444","https://openalex.org/W4394683372","https://openalex.org/W4402343098","https://openalex.org/W4409261987","https://openalex.org/W4409262040","https://openalex.org/W4409263299","https://openalex.org/W4410197292","https://openalex.org/W4410636570"],"related_works":[],"abstract_inverted_index":{"Tables":[0],"play":[1],"a":[2,58,102,154],"key":[3,76],"role":[4],"in":[5,30],"conveying":[6],"structured":[7],"data":[8],"across":[9],"documents.":[10],"Accurate":[11],"table":[12,62,144,158,168],"detection":[13,63,169],"is":[14,72],"crucial":[15],"for":[16,42,157],"downstream":[17],"tasks":[18],"like":[19],"structure":[20],"recognition":[21],"and":[22,33,50,95,114,124,126,130,148,186],"information":[23],"extraction.":[24],"However,":[25],"current":[26],"datasets":[27,44],"lack":[28],"diversity":[29],"format,":[31],"language,":[32],"layout,":[34],"limiting":[35],"real-world":[36],"generalization.":[37],"This":[38],"underscores":[39],"the":[40,174],"need":[41],"well-annotated":[43],"that":[45],"are":[46,188],"multi-lingual,":[47],"layout-diverse,":[48],"document-agnostic,":[49],"format-richTo":[51],"address":[52],"these":[53],"limitations,":[54],"we":[55,151],"introduce":[56,152],"UniTabBank,":[57],"large":[59],"scale,":[60],"diverse":[61],"dataset":[64,185],"designed":[65],"to":[66,181],"reflect":[67],"realistic":[68],"use":[69],"cases.":[70],"UniTabBank":[71,179],"characterized":[73],"by":[74],"five":[75],"attributes:":[77],"(i)":[78],"Multi-Lingual":[79],"\u2014":[80,91,100,118,133],"supporting":[81],"28":[82],"languages":[83],"(including":[84],"Arabic,":[85],"English,":[86],"Hindi,":[87],"etc.);":[88],"(ii)":[89],"Multi-Layout":[90],"encompassing":[92],"both":[93],"single-column":[94],"multi-column":[96],"documents;":[97],"(iii)":[98],"Multi-Type":[99],"covering":[101],"wide":[103],"range":[104],"of":[105,135,166,178],"document":[106,137],"genres":[107],"such":[108],"as":[109],"annual":[110],"reports,":[111],"books,":[112],"newspapers,":[113],"magazines;":[115],"(iv)":[116],"Multi-Format":[117],"comprising":[119],"scanned":[120],"documents,":[121],"photographed":[122],"pages,":[123],"PDFs;":[125],"finally":[127],"(v)":[128],"Scale":[129],"Annotation":[131],"Quality":[132],"consists":[134],"55,443":[136],"page":[138],"images":[139],"with":[140],"82,114":[141],"accurately":[142],"annotated":[143],"instances,":[145],"offering":[146],"scale":[147],"annotation":[149],"precisionAdditionally,":[150],"UniTabDet,":[153],"YOLO-based":[155],"model":[156],"detection,":[159],"which":[160],"outperforms":[161],"state-of-the-arts":[162],"on":[163],"eight":[164],"out":[165],"nine":[167],"benchmarks.":[170,183],"Cross-benchmark":[171],"evaluation":[172],"highlights":[173],"strong":[175],"generalization":[176],"capability":[177],"compared":[180],"existing":[182],"The":[184],"models":[187],"available":[189],"here.":[190]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
