{"id":"https://openalex.org/W4415428209","doi":"https://doi.org/10.3233/faia251190","title":"From Surface to Semantics: Semantic Structure Parsing for Table-Centric Document Analysis","display_name":"From Surface to Semantics: Semantic Structure Parsing for Table-Centric Document Analysis","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428209","doi":"https://doi.org/10.3233/faia251190"},"language":null,"primary_location":{"id":"doi:10.3233/faia251190","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251190","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251190","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100362261","display_name":"Xuan Li","orcid":"https://orcid.org/0009-0007-3415-0073"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Xuan Li","raw_affiliation_strings":["University of New South Wales, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046968946","display_name":"Jialiang Dong","orcid":null},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jialiang Dong","raw_affiliation_strings":["University of New South Wales, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015158455","display_name":"Raymond K. Wong","orcid":"https://orcid.org/0000-0002-9814-6029"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Raymond Wong","raw_affiliation_strings":["University of New South Wales, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100362261"],"corresponding_institution_ids":["https://openalex.org/I31746571"],"apc_list":null,"apc_paid":null,"fwci":5.2022,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.96098364,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.9699000120162964,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9629999995231628,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.8478999733924866},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7106000185012817},{"id":"https://openalex.org/keywords/semantic-computing","display_name":"Semantic computing","score":0.5078999996185303},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.47380000352859497},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.47200000286102295},{"id":"https://openalex.org/keywords/semantic-role-labeling","display_name":"Semantic role labeling","score":0.45159998536109924},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.44609999656677246},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4413999915122986},{"id":"https://openalex.org/keywords/semantic-interpretation","display_name":"Semantic interpretation","score":0.4302000105381012}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8723000288009644},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.8478999733924866},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7106000185012817},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.642799973487854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5788000226020813},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5575000047683716},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.5078999996185303},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.47380000352859497},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.47200000286102295},{"id":"https://openalex.org/C67277372","wikidata":"https://www.wikidata.org/wiki/Q7449085","display_name":"Semantic role labeling","level":3,"score":0.45159998536109924},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.44609999656677246},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4413999915122986},{"id":"https://openalex.org/C193125573","wikidata":"https://www.wikidata.org/wiki/Q7449065","display_name":"Semantic interpretation","level":2,"score":0.4302000105381012},{"id":"https://openalex.org/C2777946921","wikidata":"https://www.wikidata.org/wiki/Q7449044","display_name":"Semantic analysis (machine learning)","level":2,"score":0.40610000491142273},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4041999876499176},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.33500000834465027},{"id":"https://openalex.org/C60690694","wikidata":"https://www.wikidata.org/wiki/Q894902","display_name":"Bottom-up parsing","level":4,"score":0.31349998712539673},{"id":"https://openalex.org/C96711827","wikidata":"https://www.wikidata.org/wiki/Q17012245","display_name":"Entity linking","level":3,"score":0.3111000061035156},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.3068999946117401},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.30000001192092896},{"id":"https://openalex.org/C202708506","wikidata":"https://www.wikidata.org/wiki/Q7449050","display_name":"Semantic compression","level":5,"score":0.2768999934196472},{"id":"https://openalex.org/C110903229","wikidata":"https://www.wikidata.org/wiki/Q7449064","display_name":"Semantic integration","level":4,"score":0.27140000462532043},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.2651999890804291},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia251190","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251190","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia251190","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251190","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Documents":[0],"are":[1,29],"core":[2,128],"carriers":[3],"of":[4,56,104,146],"information":[5,27],"and":[6,14,28,49,58,70,93,101,134,143,167,177],"knowledge,":[7],"with":[8,155],"broad":[9],"applications":[10],"in":[11,173],"finance,":[12],"healthcare,":[13],"scientific":[15],"research.":[16],"Tables,":[17],"as":[18,44,186],"the":[19,31],"main":[20],"medium":[21],"for":[22],"structured":[23],"data,":[24],"encapsulate":[25],"key":[26],"among":[30],"most":[32],"critical":[33],"document":[34,82,131,179],"components.":[35],"Existing":[36],"studies":[37],"largely":[38],"focus":[39],"on":[40,121,151],"surface-level":[41],"tasks":[42,65],"such":[43,185],"layout":[45],"analysis,":[46],"table":[47,136],"detection,":[48],"data":[50,68],"extraction,":[51],"lacking":[52],"deep":[53,88,178],"semantic":[54,81,89,123,141,175],"parsing":[55,83,110,133,180],"tables":[57,92,158],"their":[59,94],"contextual":[60],"associations.":[61],"This":[62],"limits":[63],"advanced":[64,183],"like":[66],"cross-paragraph":[67],"interpretation":[69],"context-consistent":[71],"analysis.":[72],"To":[73],"address":[74],"this,":[75],"we":[76],"propose":[77],"DOTABLER,":[78],"a":[79,98,108],"table-centric":[80,130],"framework":[84],"designed":[85],"to":[86,112,118,182],"uncover":[87],"links":[90],"between":[91],"context.":[95],"DOTABLER":[96,125,162],"leverages":[97],"custom":[99],"dataset":[100],"domain-specific":[102,135],"fine-tuning":[103],"pre-trained":[105],"models,":[106],"integrating":[107],"complete":[109],"pipeline":[111],"identify":[113],"context":[114],"segments":[115],"semantically":[116,147],"tied":[117],"tables.":[119,149],"Built":[120],"this":[122],"understanding,":[124],"implements":[126],"two":[127],"functionalities:":[129],"structure":[132],"retrieval,":[137],"delivering":[138],"comprehensive":[139],"table-anchored":[140],"analysis":[142,176],"precise":[144],"extraction":[145],"relevant":[148],"Evaluated":[150],"nearly":[152],"4,000":[153],"pages":[154],"over":[156,164],"1,000":[157],"from":[159],"real-world":[160],"PDFs,":[161],"achieves":[163],"90%":[165],"Precision":[166],"F1":[168],"scores,":[169],"demonstrating":[170],"superior":[171],"performance":[172],"table-context":[174],"compared":[181],"models":[184],"GPT-4o.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
