{"id":"https://openalex.org/W7137854063","doi":"https://doi.org/10.1609/aaai.v40i29.39676","title":"Revisiting the Data Sampling in Multimodal Post-training from a Difficulty-Distinguish View","display_name":"Revisiting the Data Sampling in Multimodal Post-training from a Difficulty-Distinguish View","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137854063","doi":"https://doi.org/10.1609/aaai.v40i29.39676"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i29.39676","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i29.39676","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i29.39676","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052397173","display_name":"Jianyu Qi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]},{"id":"https://openalex.org/I75746372","display_name":"ZTE (United States)","ror":"https://ror.org/0518yg160","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098582","https://openalex.org/I75746372"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Jianyu Qi","raw_affiliation_strings":["School of Computer Science and Engineering, Central South University\nIntelligent System Department, Zhongxing Telecom Equipment(ZTE)"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Central South University\nIntelligent System Department, Zhongxing Telecom Equipment(ZTE)","institution_ids":["https://openalex.org/I75746372","https://openalex.org/I4210098582"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129735384","display_name":"Ding Zou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]},{"id":"https://openalex.org/I75746372","display_name":"ZTE (United States)","ror":"https://ror.org/0518yg160","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098582","https://openalex.org/I75746372"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Ding Zou","raw_affiliation_strings":["Intelligent System Department, Zhongxing Telecom Equipment(ZTE)"],"affiliations":[{"raw_affiliation_string":"Intelligent System Department, Zhongxing Telecom Equipment(ZTE)","institution_ids":["https://openalex.org/I75746372","https://openalex.org/I4210098582"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025362574","display_name":"Wenrui Yan","orcid":"https://orcid.org/0000-0003-4706-9729"},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]},{"id":"https://openalex.org/I75746372","display_name":"ZTE (United States)","ror":"https://ror.org/0518yg160","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098582","https://openalex.org/I75746372"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Wenrui Yan","raw_affiliation_strings":["Intelligent System Department, Zhongxing Telecom Equipment(ZTE)"],"affiliations":[{"raw_affiliation_string":"Intelligent System Department, Zhongxing Telecom Equipment(ZTE)","institution_ids":["https://openalex.org/I75746372","https://openalex.org/I4210098582"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129650410","display_name":"Rui Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]},{"id":"https://openalex.org/I75746372","display_name":"ZTE (United States)","ror":"https://ror.org/0518yg160","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098582","https://openalex.org/I75746372"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Rui Ma","raw_affiliation_strings":["Intelligent System Department, Zhongxing Telecom Equipment(ZTE)"],"affiliations":[{"raw_affiliation_string":"Intelligent System Department, Zhongxing Telecom Equipment(ZTE)","institution_ids":["https://openalex.org/I75746372","https://openalex.org/I4210098582"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129687951","display_name":"Jiaxu Li","orcid":null},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxu Li","raw_affiliation_strings":["School of Computer Science and Engineering, Central South University"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Central South University","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125988390","display_name":"Zhijie Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhijie Zheng","raw_affiliation_strings":["School of Traffic & Transportation Engineering, Central South University"],"affiliations":[{"raw_affiliation_string":"School of Traffic & Transportation Engineering, Central South University","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129646584","display_name":"Zhiguo Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]},{"id":"https://openalex.org/I75746372","display_name":"ZTE (United States)","ror":"https://ror.org/0518yg160","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098582","https://openalex.org/I75746372"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Zhiguo Yang","raw_affiliation_strings":["Intelligent System Department, Zhongxing Telecom Equipment(ZTE)"],"affiliations":[{"raw_affiliation_string":"Intelligent System Department, Zhongxing Telecom Equipment(ZTE)","institution_ids":["https://openalex.org/I75746372","https://openalex.org/I4210098582"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086950413","display_name":"Rongchang Zhao","orcid":"https://orcid.org/0000-0002-8453-9446"},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongchang Zhao","raw_affiliation_strings":["School of Computer Science and Engineering, Central South University"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Central South University","institution_ids":["https://openalex.org/I139660479"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5052397173"],"corresponding_institution_ids":["https://openalex.org/I4210098582","https://openalex.org/I75746372"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0858209,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"29","first_page":"24891","last_page":"24899"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8881000280380249,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8881000280380249,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.021400000900030136,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.020999999716877937,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5857999920845032},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.576200008392334},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5250999927520752},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.48420000076293945},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.47999998927116394},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4009000062942505},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3749000132083893},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.32600000500679016}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7382000088691711},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6406999826431274},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6305000185966492},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5857999920845032},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.576200008392334},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5250999927520752},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.48420000076293945},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.47999998927116394},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4009000062942505},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3749000132083893},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.32600000500679016},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.30640000104904175},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.2867000102996826},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26010000705718994},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i29.39676","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i29.39676","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i29.39676","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i29.39676","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,12],"Multimodal":[3],"Large":[4],"Language":[5],"Models":[6],"(MLLMs)":[7],"have":[8],"spurred":[9],"significant":[10],"progress":[11],"Chain-of-Thought":[13],"(CoT)":[14],"reasoning.":[15],"Building":[16],"on":[17,30,36],"the":[18,160],"success":[19],"of":[20,52,57,142],"Deepseek-R1,":[21],"researchers":[22],"extended":[23],"multimodal":[24],"reasoning":[25,75],"to":[26,44,70,145,149],"post-training":[27,41,62,66],"paradigms":[28,42,67],"based":[29],"reinforcement":[31],"learning":[32],"(RL),":[33],"focusing":[34],"predominantly":[35],"mathematical":[37],"datasets.":[38,137],"However,":[39],"existing":[40],"tend":[43],"neglect":[45],"two":[46,83],"critical":[47],"aspects:":[48],"(1)":[49],"The":[50],"lack":[51],"quantifiable":[53],"difficulty":[54],"metrics":[55],"capable":[56],"strategically":[58],"screening":[59],"samples":[60,147],"for":[61,162],"optimization.":[63],"(2)":[64],"Suboptimal":[65],"that":[68,122,154],"fail":[69],"jointly":[71],"optimize":[72],"perception":[73],"and":[74,126,131],"capabilities.":[76],"To":[77],"address":[78],"this":[79],"gap,":[80],"we":[81,116],"propose":[82],"novel":[84],"difficulty-aware":[85],"sampling":[86,157],"strategies:":[87],"Progressive":[88],"Image":[89],"Semantic":[90],"Masking":[91],"(PISM)":[92],"quantifies":[93],"sample":[94],"hardness":[95],"through":[96],"systematic":[97],"image":[98],"degradation,":[99],"while":[100,165],"Cross-Modality":[101],"Attention":[102],"Balance":[103],"(CMAB)":[104],"assesses":[105],"cross-modal":[106],"interaction":[107],"complexity":[108],"via":[109],"attention":[110],"distribution":[111],"analysis.":[112],"Leveraging":[113],"these":[114],"metrics,":[115],"design":[117],"a":[118],"hierarchical":[119],"training":[120,129],"framework":[121],"incorporates":[123],"both":[124],"GRPO-only":[125],"SFT+GRPO":[127,151],"hybrid":[128],"paradigms,":[130],"evaluate":[132],"them":[133],"across":[134],"six":[135],"benchmark":[136],"Experiments":[138],"demonstrate":[139],"consistent":[140],"superiority":[141],"GRPO":[143],"applied":[144],"difficulty-stratified":[146],"compared":[148],"conventional":[150],"pipelines,":[152],"indicating":[153],"strategic":[155],"data":[156],"can":[158],"obviate":[159],"need":[161],"supervised":[163],"fine-tuning":[164],"improving":[166],"model":[167],"accuracy.":[168]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
