{"id":"https://openalex.org/W7137823675","doi":"https://doi.org/10.1609/aaai.v40i1.37042","title":"Multimodal Table Understanding with Difficulty-aware Reinforcement Learning","display_name":"Multimodal Table Understanding with Difficulty-aware Reinforcement Learning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137823675","doi":"https://doi.org/10.1609/aaai.v40i1.37042"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i1.37042","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i1.37042","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i1.37042","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061846588","display_name":"Chaohu Liu","orcid":"https://orcid.org/0009-0001-7588-4264"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chaohu Liu","raw_affiliation_strings":["University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129699042","display_name":"Haoyu Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Cao","raw_affiliation_strings":["University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129662301","display_name":"YongXiang Hua","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"YongXiang Hua","raw_affiliation_strings":["University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009732907","display_name":"Linli Xu","orcid":"https://orcid.org/0000-0003-0227-3793"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linli Xu","raw_affiliation_strings":["University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5061846588"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03719599,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"1","first_page":"755","last_page":"763"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.6762999892234802,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.6762999892234802,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10789","display_name":"Interactive and Immersive Displays","score":0.01759999990463257,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.016200000420212746,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.8090000152587891},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.608299970626831},{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.5285999774932861},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.47110000252723694},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.3560999929904938},{"id":"https://openalex.org/keywords/table-of-contents","display_name":"Table of contents","score":0.3361000120639801},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.32580000162124634},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.31690001487731934}],"concepts":[{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.8090000152587891},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7710000276565552},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.608299970626831},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5605000257492065},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.5285999774932861},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5052000284194946},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.47110000252723694},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C68476402","wikidata":"https://www.wikidata.org/wiki/Q1456936","display_name":"Table of contents","level":2,"score":0.3361000120639801},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33379998803138733},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.32580000162124634},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.31690001487731934},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C172967692","wikidata":"https://www.wikidata.org/wiki/Q747762","display_name":"Decision table","level":3,"score":0.28540000319480896},{"id":"https://openalex.org/C47177190","wikidata":"https://www.wikidata.org/wiki/Q207137","display_name":"Curriculum","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2563999891281128},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2556000053882599}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i1.37042","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i1.37042","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i1.37042","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i1.37042","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7484975457191467,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0,71],"table":[1,10,79,144,196,234],"understanding,":[2],"which":[3],"aims":[4],"for":[5,59,233],"a":[6,22,36,56,83,104,130,169,183,218],"comprehensive":[7],"grasp":[8],"of":[9,27,35,46,53,69,143],"content":[11],"by":[12,39,188,221],"integrating":[13],"cellular":[14],"text,":[15],"tabular":[16,95],"structure,":[17],"and":[18,49,91,119,140,146,172,190,226],"visual":[19],"presentation,":[20],"remains":[21],"core":[23],"yet":[24],"challenging":[25],"area":[26],"research.":[28],"We":[29],"identify":[30],"that":[31,66,158,199],"the":[32,44,50,67,134,159],"structural":[33],"complexity":[34,80],"table,":[37],"quantifiable":[38],"intrinsic":[40],"properties":[41],"such":[42],"as":[43,78],"ratio":[45],"merged":[47],"cells":[48],"total":[51],"number":[52],"cells,":[54],"presents":[55],"significant":[57],"obstacle":[58],"existing":[60],"models.":[61],"Our":[62],"empirical":[63],"analysis":[64],"reveals":[65],"performance":[68],"leading":[70],"Large":[72],"Language":[73],"Models":[74],"(MLLMs)":[75],"deteriorates":[76],"markedly":[77],"increases,":[81],"exposing":[82],"critical":[84],"vulnerability":[85],"in":[86,181],"their":[87],"ability":[88],"to":[89,128,149,164],"perceive":[90],"reason":[92],"over":[93],"intricate":[94],"data.":[96],"To":[97],"address":[98],"this":[99],"challenge,":[100],"we":[101,115,177],"propose":[102],"MM-Table-R1,":[103],"model":[105,135,160,216],"enhanced":[106],"through":[107],"difficulty-aware":[108],"reinforcement":[109],"learning":[110,174],"(RL)":[111],"post-training":[112],"strategy.":[113],"Specifically,":[114],"introduce":[116],"both":[117,223],"task-level":[118,124],"data-level":[120,155],"curriculum":[121,125,156],"learning.":[122],"The":[123,154],"is":[126,161,202],"designed":[127,232],"establish":[129],"capability":[131],"ladder,":[132],"where":[133],"first":[136],"learns":[137],"basic":[138],"perceptual":[139],"semantic":[141],"alignment":[142],"data,":[145],"then":[147],"progresses":[148],"acquiring":[150],"multi-step":[151],"reasoning":[152],"capabilities.":[153],"ensures":[157],"not":[162],"exposed":[163],"difficult":[165],"samples":[166],"prematurely,":[167],"facilitating":[168],"more":[170],"gradual":[171],"effective":[173],"process.":[175],"Furthermore,":[176],"invest":[178],"considerable":[179],"effort":[180],"constructing":[182],"high-quality,":[184],"large-scale":[185],"training":[186],"corpus":[187],"curating":[189],"processing":[191],"data":[192],"from":[193],"diverse":[194],"open-source":[195],"datasets,":[197],"ensuring":[198],"each":[200],"instance":[201],"paired":[203],"with":[204],"an":[205],"objectively":[206],"verifiable":[207],"reward":[208],"signal.":[209],"Demonstrating":[210],"exceptional":[211],"parameter":[212],"efficiency,":[213],"our":[214],"3B-parameter":[215],"sets":[217],"new":[219],"benchmark":[220],"surpassing":[222],"established":[224],"3B":[225],"7B":[227],"models,":[228],"including":[229],"those":[230],"specifically":[231],"reasoning.":[235]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
