{"id":"https://openalex.org/W2088980200","doi":"https://doi.org/10.1045/november14-klampfl","title":"A Comparison of Two Unsupervised Table Recognition Methods from Digital Scientific Articles","display_name":"A Comparison of Two Unsupervised Table Recognition Methods from Digital Scientific Articles","publication_year":2014,"publication_date":"2014-11-01","ids":{"openalex":"https://openalex.org/W2088980200","doi":"https://doi.org/10.1045/november14-klampfl","mag":"2088980200"},"language":"en","primary_location":{"id":"doi:10.1045/november14-klampfl","is_oa":true,"landing_page_url":"https://doi.org/10.1045/november14-klampfl","pdf_url":null,"source":{"id":"https://openalex.org/S119508283","display_name":"D-Lib Magazine","issn_l":"1082-9873","issn":["1082-9873"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310316316","host_organization_name":"Corporation for National Research Initiatives","host_organization_lineage":["https://openalex.org/P4310316316"],"host_organization_lineage_names":["Corporation for National Research Initiatives"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"D-Lib Magazine","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1045/november14-klampfl","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027426964","display_name":"Stefan Klampfl","orcid":null},"institutions":[{"id":"https://openalex.org/I4210088621","display_name":"Know Center Research GmbH (Austria)","ror":"https://ror.org/004zhad81","country_code":"AT","type":"company","lineage":["https://openalex.org/I4210088621"]},{"id":"https://openalex.org/I4210113888","display_name":"Iowa Department of Cultural Affairs","ror":"https://ror.org/025ycfy09","country_code":"US","type":"government","lineage":["https://openalex.org/I4210113888"]}],"countries":["AT","US"],"is_corresponding":true,"raw_author_name":"Stefan Klampfl","raw_affiliation_strings":["4.1.1 ICDAR","Know-Center GmbH Research Center for Data-Driven Business & Big Data Analytics (98770)"],"affiliations":[{"raw_affiliation_string":"4.1.1 ICDAR","institution_ids":["https://openalex.org/I4210113888"]},{"raw_affiliation_string":"Know-Center GmbH Research Center for Data-Driven Business & Big Data Analytics (98770)","institution_ids":["https://openalex.org/I4210088621"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009492024","display_name":"Kris Jack","orcid":"https://orcid.org/0000-0002-6740-4136"},"institutions":[{"id":"https://openalex.org/I4210106863","display_name":"Mendeley (United Kingdom)","ror":"https://ror.org/01t2a8a42","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210106863"]},{"id":"https://openalex.org/I4210113888","display_name":"Iowa Department of Cultural Affairs","ror":"https://ror.org/025ycfy09","country_code":"US","type":"government","lineage":["https://openalex.org/I4210113888"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Kris Jack","raw_affiliation_strings":["4.1.1 ICDAR","Mendeley"],"affiliations":[{"raw_affiliation_string":"4.1.1 ICDAR","institution_ids":["https://openalex.org/I4210113888"]},{"raw_affiliation_string":"Mendeley","institution_ids":["https://openalex.org/I4210106863"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014398832","display_name":"Roman Kern","orcid":"https://orcid.org/0000-0003-0202-6100"},"institutions":[{"id":"https://openalex.org/I4210088621","display_name":"Know Center Research GmbH (Austria)","ror":"https://ror.org/004zhad81","country_code":"AT","type":"company","lineage":["https://openalex.org/I4210088621"]},{"id":"https://openalex.org/I4210113888","display_name":"Iowa Department of Cultural Affairs","ror":"https://ror.org/025ycfy09","country_code":"US","type":"government","lineage":["https://openalex.org/I4210113888"]}],"countries":["AT","US"],"is_corresponding":false,"raw_author_name":"Roman Kern","raw_affiliation_strings":["4.1.1 ICDAR","Know-Center GmbH Research Center for Data-Driven Business & Big Data Analytics (98770)"],"affiliations":[{"raw_affiliation_string":"4.1.1 ICDAR","institution_ids":["https://openalex.org/I4210113888"]},{"raw_affiliation_string":"Know-Center GmbH Research Center for Data-Driven Business & Big Data Analytics (98770)","institution_ids":["https://openalex.org/I4210088621"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5027426964"],"corresponding_institution_ids":["https://openalex.org/I4210088621","https://openalex.org/I4210113888"],"apc_list":null,"apc_paid":null,"fwci":1.2194,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.83745799,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"20","issue":"11/12","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.7907075881958008},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7747777700424194},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.7161092758178711},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.5533433556556702},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5363060235977173},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.48355361819267273},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.4771158993244171},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4525246024131775},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4480661153793335},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.435813844203949},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4175894260406494},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36560487747192383},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12088635563850403}],"concepts":[{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.7907075881958008},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7747777700424194},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.7161092758178711},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.5533433556556702},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5363060235977173},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48355361819267273},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.4771158993244171},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4525246024131775},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4480661153793335},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.435813844203949},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4175894260406494},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36560487747192383},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12088635563850403},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1045/november14-klampfl","is_oa":true,"landing_page_url":"https://doi.org/10.1045/november14-klampfl","pdf_url":null,"source":{"id":"https://openalex.org/S119508283","display_name":"D-Lib Magazine","issn_l":"1082-9873","issn":["1082-9873"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310316316","host_organization_name":"Corporation for National Research Initiatives","host_organization_lineage":["https://openalex.org/P4310316316"],"host_organization_lineage_names":["Corporation for National Research Initiatives"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"D-Lib Magazine","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1045/november14-klampfl","is_oa":true,"landing_page_url":"https://doi.org/10.1045/november14-klampfl","pdf_url":null,"source":{"id":"https://openalex.org/S119508283","display_name":"D-Lib Magazine","issn_l":"1082-9873","issn":["1082-9873"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310316316","host_organization_name":"Corporation for National Research Initiatives","host_organization_lineage":["https://openalex.org/P4310316316"],"host_organization_lineage_names":["Corporation for National Research Initiatives"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"D-Lib Magazine","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2280422768","https://openalex.org/W3143197806","https://openalex.org/W4252555497","https://openalex.org/W3121175838","https://openalex.org/W3016293053","https://openalex.org/W1690653314","https://openalex.org/W2401723157","https://openalex.org/W2784269775","https://openalex.org/W2952904874","https://openalex.org/W324626582"],"abstract_inverted_index":{"In":[0,42,97],"digital":[1,30],"scientific":[2],"articles":[3],"tables":[4,68,89],"are":[5],"a":[6,13,70,91,108],"common":[7],"form":[8],"of":[9,20,26,67,87,93,111,117,128],"presenting":[10],"information":[11,28,37],"in":[12,29,120,151],"structured":[14],"way.":[15],"However,":[16],"the":[17,24,62,65,78,84,98,115,129,146,160],"large":[18],"variability":[19],"table":[21,48,79,112,122,147],"layouts":[22],"and":[23,39,56,64,132,157],"lack":[25],"structural":[27],"document":[31],"formats":[32],"pose":[33],"significant":[34],"challenges":[35],"for":[36],"retrieval":[38],"related":[40],"tasks.":[41],"this":[43],"paper":[44],"we":[45],"present":[46],"two":[47,101,137],"recognition":[49,148],"methods":[50,106],"based":[51],"on":[52,136],"unsupervised":[53],"learning":[54],"techniques":[55],"heuristics":[57],"which":[58],"automatically":[59],"detect":[60],"both":[61,76],"location":[63],"structure":[66,104],"within":[69],"article":[71],"stored":[72],"as":[73],"PDF.":[74],"For":[75],"algorithms":[77,130],"region":[80],"detection":[81,105],"first":[82],"identifies":[83],"bounding":[85],"boxes":[86],"individual":[88],"from":[90,114,140],"set":[92,116],"labelled":[94],"text":[95],"blocks.":[96],"second":[99],"step,":[100],"different":[102,141],"tabular":[103],"extract":[107],"rectangular":[109],"grid":[110],"cells":[113],"words":[118],"contained":[119],"these":[121],"regions.":[123],"We":[124,143],"evaluate":[125],"each":[126],"stage":[127],"separately":[131],"compare":[133],"performance":[134,149],"values":[135],"data":[138],"sets":[139],"domains.":[142],"find":[144],"that":[145],"is":[150],"line":[152],"with":[153],"state-of-the-art":[154],"commercial":[155],"systems":[156],"generalises":[158],"to":[159],"non-scientific":[161],"domain.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":3}],"updated_date":"2026-02-25T21:11:00.739837","created_date":"2025-10-10T00:00:00"}
