{"id":"https://openalex.org/W7154247911","doi":"https://doi.org/10.48550/arxiv.2604.10973","title":"CFMS: A Coarse-to-Fine Multimodal Synthesis Framework for Enhanced Tabular Reasoning","display_name":"CFMS: A Coarse-to-Fine Multimodal Synthesis Framework for Enhanced Tabular Reasoning","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7154247911","doi":"https://doi.org/10.48550/arxiv.2604.10973"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10973","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10973","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10973","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133553305","display_name":"Qixian Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Qixian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127861342","display_name":"Hongqiang Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Hongqiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133567151","display_name":"Tong Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Tong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110813535","display_name":"Yingsen Wang","orcid":"https://orcid.org/0000-0003-3038-4255"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yingsen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113584851","display_name":"Zhenghui Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Zhenghui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133609885","display_name":"Qirui Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Qirui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133597949","display_name":"Yiding Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yiding","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133603785","display_name":"Dongxu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Dongxu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5133553305"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.850600004196167,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.850600004196167,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.053599998354911804,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.04610000178217888,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.609499990940094},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.486299991607666},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.4805999994277954},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.383899986743927},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.3617999851703644},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3391999900341034},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.3059999942779541},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.3019999861717224}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7985000014305115},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.609499990940094},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.539900004863739},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.486299991607666},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.4805999994277954},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.383899986743927},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.3617999851703644},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.36059999465942383},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3391999900341034},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3255999982357025},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3089999854564667},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3059999942779541},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C2779639559","wikidata":"https://www.wikidata.org/wiki/Q7661178","display_name":"Symbolic execution","level":3,"score":0.28780001401901245},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.2687000036239624},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.26820001006126404},{"id":"https://openalex.org/C2780929884","wikidata":"https://www.wikidata.org/wiki/Q737460","display_name":"Blindness","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.25589999556541443}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10973","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10973","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10973","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10973","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reasoning":[0],"over":[1,124],"tabular":[2],"data":[3],"is":[4],"a":[5,61,89,93,102,112,116],"crucial":[6],"capability":[7],"for":[8],"tasks":[9],"like":[10,31],"question":[11],"answering":[12],"and":[13,25,118,132,150,160],"fact":[14],"verification,":[15],"as":[16,101],"it":[17],"requires":[18],"models":[19],"to":[20,46,87,106],"comprehend":[21],"both":[22],"free-form":[23],"questions":[24],"semi-structured":[26],"tables.":[27],"However,":[28],"while":[29],"methods":[30],"Chain-of-Thought":[32],"(CoT)":[33],"introduce":[34],"reasoning":[35,104],"chains,":[36],"purely":[37],"symbolic":[38,73,113],"methodes":[39],"are":[40],"inherently":[41],"limited":[42],"by":[43],"their":[44],"blindness":[45],"holistic":[47],"visual":[48,69],"patterns.":[49],"To":[50],"address":[51],"this,":[52],"we":[53],"propose":[54],"the":[55,76,81,108,125,130],"Coarse-to-Fine":[56],"Multimodal":[57,82],"Synthesis":[58],"framework":[59,142],"(CFMS),":[60],"novel":[62],"two-stage":[63],"paradigm":[64],"that":[65,136],"hierarchically":[66],"decouples":[67],"high-level":[68],"perception":[70],"from":[71],"granular":[72],"reasoning.":[74],"In":[75],"Coarse":[77],"Stage,":[78],"CFMS":[79,137],"leverages":[80],"Large":[83],"Language":[84],"Models":[85],"(MLLMs)":[86],"perform":[88],"one-time":[90],"synthesis":[91],"of":[92,121],"multi-perspective":[94],"knowledge":[95],"tuple.":[96],"This":[97],"tuple":[98],"subsequently":[99],"serves":[100],"dynamic":[103],"map":[105],"guide":[107],"fine":[109],"stage,":[110],"where":[111],"engine":[114],"executes":[115],"targeted":[117],"efficient":[119],"sequence":[120],"iterative":[122],"operations":[123],"table.":[126],"Extensive":[127],"experiments":[128],"on":[129],"WikiTQ":[131],"TabFact":[133],"benchmarks":[134],"demonstrate":[135],"achieves":[138],"competitive":[139],"accuracy.":[140],"The":[141],"exhibits":[143],"particular":[144],"robustness":[145],"when":[146,151],"handling":[147],"large":[148],"tables":[149],"instantiated":[152],"with":[153],"smaller":[154],"backbone":[155],"models,":[156],"validating":[157],"its":[158],"effectiveness":[159],"generalizability.":[161]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-15T00:00:00"}
