{"id":"https://openalex.org/W7150987003","doi":"https://doi.org/10.48550/arxiv.2604.02880","title":"InstructTable: Improving Table Structure Recognition Through Instructions","display_name":"InstructTable: Improving Table Structure Recognition Through Instructions","publication_year":2026,"publication_date":"2026-04-03","ids":{"openalex":"https://openalex.org/W7150987003","doi":"https://doi.org/10.48550/arxiv.2604.02880"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.02880","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.02880","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.02880","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133003072","display_name":"Boming Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Boming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133048187","display_name":"Zining Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zining","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102907192","display_name":"Zhentao Guo","orcid":"https://orcid.org/0009-0000-0242-4059"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Zhentao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Liu, Jianqiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jianqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126725608","display_name":"Chen Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133033907","display_name":"Yu Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133017657","display_name":"Kai Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"zhou, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133030578","display_name":"Pengfei Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Pengfei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5133003072"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9218000173568726,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9218000173568726,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.009999999776482582,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12549","display_name":"Image and Object Detection Techniques","score":0.007499999832361937,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.7598000168800354},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.7170000076293945},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6568999886512756},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5020999908447266},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.359499990940094},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3384000062942505},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.33250001072883606}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8155999779701233},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.7598000168800354},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.7170000076293945},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6568999886512756},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5026999711990356},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5020999908447266},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4528999924659729},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3682999908924103},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.359499990940094},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3384000062942505},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.33250001072883606},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32109999656677246},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.29350000619888306},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2890999913215637},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2872999906539917},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.26910001039505005},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26089999079704285}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.02880","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.02880","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.02880","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.02880","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Table":[0,115],"structure":[1],"recognition":[2,45],"(TSR)":[3],"holds":[4],"widespread":[5],"practical":[6],"importance":[7],"by":[8],"parsing":[9,108],"tabular":[10,127],"images":[11,145],"into":[12],"structured":[13],"representations,":[14],"yet":[15],"encounters":[16],"significant":[17],"challenges":[18],"when":[19],"processing":[20],"complex":[21,47,95,143],"layouts":[22],"involving":[23],"merged":[24],"or":[25],"empty":[26],"cells.":[27],"Traditional":[28],"visual-centric":[29],"models":[30,50],"rely":[31],"exclusively":[32],"on":[33,158],"visual":[34,64,102],"information":[35,103],"while":[36],"lacking":[37],"crucial":[38],"semantic":[39],"support,":[40],"thereby":[41],"impeding":[42],"accurate":[43],"structural":[44,65,90],"in":[46,173],"scenarios.":[48,111],"Vision-language":[49],"leverage":[51],"contextual":[52],"semantics":[53],"to":[54,150],"enhance":[55],"comprehension;":[56],"however,":[57],"these":[58,69],"approaches":[59],"underemphasize":[60],"the":[61,133,180,184],"modeling":[62],"of":[63,94,183],"information.":[66],"To":[67],"address":[68],"limitations,":[70],"this":[71],"paper":[72],"introduces":[73],"InstructTable,":[74],"an":[75,119],"instruction-guided":[76],"multi-stage":[77],"training":[78],"TSR":[79,98,174],"framework.":[80],"Meticulously":[81],"designed":[82],"table":[83,107,144],"instruction":[84],"pre-training":[85],"directs":[86],"attention":[87],"toward":[88],"fine-grained":[89],"patterns,":[91],"enhancing":[92],"comprehension":[93],"tables.":[96],"Complementary":[97],"fine-tuning":[99],"preserves":[100],"robust":[101],"modeling,":[104],"maintaining":[105],"high-precision":[106],"across":[109],"diverse":[110],"Furthermore,":[112],"we":[113,131],"introduce":[114],"Mix":[116],"Expand":[117],"(TME),":[118],"innovative":[120],"template-free":[121],"method":[122,149],"for":[123],"synthesizing":[124],"large-scale":[125],"authentic":[126],"data.":[128,190],"Leveraging":[129],"TME,":[130],"construct":[132],"Balanced":[134],"Complex":[135],"Dense":[136],"Synthetic":[137],"Tables":[138],"(BCDSTab)":[139],"benchmark,":[140],"comprising":[141],"900":[142],"synthesized":[146],"through":[147],"our":[148],"serve":[151],"as":[152],"a":[153],"rigorous":[154],"benchmark.":[155],"Extensive":[156],"experiments":[157],"multiple":[159],"public":[160],"datasets":[161],"(FinTabNet,":[162],"PubTabNet,":[163],"MUSTARD)":[164],"and":[165,188],"BCDSTab":[166],"demonstrate":[167],"that":[168],"InstructTable":[169],"achieves":[170],"state-of-the-art":[171],"performance":[172],"tasks.":[175],"Ablation":[176],"studies":[177],"further":[178],"confirm":[179],"positive":[181],"impact":[182],"proposed":[185],"tabular-data-specific":[186],"instructions":[187],"synthetic":[189]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-04-07T00:00:00"}
