{"id":"https://openalex.org/W7133350380","doi":"https://doi.org/10.48550/arxiv.2603.00889","title":"CHIMERA: Compact Synthetic Data for Generalizable LLM Reasoning","display_name":"CHIMERA: Compact Synthetic Data for Generalizable LLM Reasoning","publication_year":2026,"publication_date":"2026-03-01","ids":{"openalex":"https://openalex.org/W7133350380","doi":"https://doi.org/10.48550/arxiv.2603.00889"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.00889","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00889","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.00889","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127904293","display_name":"Xinyu Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhu, Xinyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111345651","display_name":"Yihao Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Yihao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101353767","display_name":"Yanchao Sun","orcid":"https://orcid.org/0000-0002-1137-9939"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yanchao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127818264","display_name":"Xianzhi Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Xianzhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128000508","display_name":"Pingzhi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Pingzhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001454502","display_name":"Olli Saarikivi","orcid":"https://orcid.org/0000-0001-7596-4734"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saarikivi, Olli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127996544","display_name":"Yun Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Yun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128012404","display_name":"Yu Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5127904293"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4203999936580658,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4203999936580658,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.13079999387264252,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09160000085830688,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5078999996185303},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.5029000043869019},{"id":"https://openalex.org/keywords/opportunistic-reasoning","display_name":"Opportunistic reasoning","score":0.49559998512268066},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.483599990606308},{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.46700000762939453},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.45570001006126404},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.4544000029563904},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.4415000081062317},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.42750000953674316},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.41819998621940613}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7251999974250793},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6312000155448914},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5078999996185303},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.5029000043869019},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.49559998512268066},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.483599990606308},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.46700000762939453},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.45570001006126404},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.4544000029563904},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.4415000081062317},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.42750000953674316},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4196999967098236},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.41819998621940613},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.39500001072883606},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.392300009727478},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.3781999945640564},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.37599998712539673},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.3416999876499176},{"id":"https://openalex.org/C2992562121","wikidata":"https://www.wikidata.org/wiki/Q3817808","display_name":"Scientific reasoning","level":2,"score":0.3346000015735626},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.33309999108314514},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3328999876976013},{"id":"https://openalex.org/C157170001","wikidata":"https://www.wikidata.org/wiki/Q4781507","display_name":"Applications of artificial intelligence","level":2,"score":0.3244999945163727},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.27410000562667847},{"id":"https://openalex.org/C182620335","wikidata":"https://www.wikidata.org/wiki/Q2852531","display_name":"Answer set programming","level":3,"score":0.2605000138282776},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.25690001249313354}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.00889","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00889","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.00889","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00889","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.535010814666748}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"recently":[5],"exhibited":[6],"remarkable":[7],"reasoning":[8,23,63,73,96,116,138,143,181,216,232],"capabilities,":[9],"largely":[10],"enabled":[11],"by":[12,38,141],"supervised":[13],"fine-tuning":[14],"(SFT)-":[15],"and":[16,27,33,86,149,157,168,188,224,241],"reinforcement":[17],"learning":[18],"(RL)-based":[19],"post-training":[20],"on":[21,211],"high-quality":[22],"data.":[24],"However,":[25],"reproducing":[26],"extending":[28],"these":[29,108],"capabilities":[30],"in":[31,77],"open":[32],"scalable":[34,175],"settings":[35],"is":[36,126],"hindered":[37],"three":[39,129],"fundamental":[40],"data-centric":[41],"challenges:":[42],"(1)":[43,132],"the":[44,49,88,92,201,205,231],"cold-start":[45],"problem,":[46],"arising":[47],"from":[48],"lack":[50],"of":[51,82,94,214,234],"seed":[52],"datasets":[53,74],"with":[54,79,128],"detailed,":[55],"long":[56,136],"Chain-of-Thought":[57],"(CoT)":[58],"trajectories":[59,139],"needed":[60],"to":[61,183,194],"initialize":[62],"policies;":[64],"(2)":[65,145],"limited":[66,80],"domain":[67],"coverage,":[68,151],"as":[69,239],"most":[70],"existing":[71],"open-source":[72],"are":[75],"concentrated":[76],"mathematics,":[78],"coverage":[81],"broader":[83],"scientific":[84,155],"disciplines;":[85],"(3)":[87,169],"annotation":[89,101],"bottleneck,":[90],"where":[91],"difficulty":[93],"frontier-level":[95],"tasks":[97],"makes":[98],"reliable":[99],"human":[100],"prohibitively":[102],"expensive":[103],"or":[104,229],"infeasible.":[105],"To":[106],"address":[107],"challenges,":[109],"we":[110],"introduce":[111],"CHIMERA,":[112],"a":[113,164,172,196,212],"compact":[114],"synthetic":[115],"dataset":[117],"comprising":[118],"9K":[119],"samples":[120],"for":[121],"generalizable":[122],"cross-domain":[123],"reasoning.":[124],"CHIMERA":[125,193],"constructed":[127],"key":[130],"properties:":[131],"it":[133,146,170],"provides":[134],"rich,":[135],"CoT":[137],"synthesized":[140],"state-of-the-art":[142],"models;":[144],"has":[147],"broad":[148],"structured":[150],"spanning":[152],"8":[153],"major":[154],"disciplines":[156],"over":[158],"1K":[159],"fine-grained":[160],"topics":[161],"organized":[162],"via":[163],"model-generated":[165],"hierarchical":[166],"taxonomy;":[167],"employs":[171],"fully":[173],"automated,":[174],"evaluation":[176],"pipeline":[177],"that":[178],"uses":[179],"strong":[180,209],"models":[182,237],"cross-validate":[184],"both":[185],"problem":[186],"validity":[187],"answer":[189],"correctness.":[190],"We":[191],"use":[192],"post-train":[195],"4B":[197],"Qwen3":[198],"model.":[199],"Despite":[200],"dataset's":[202],"modest":[203],"size,":[204],"resulting":[206],"model":[207],"achieves":[208],"performance":[210,233],"suite":[213],"challenging":[215],"benchmarks,":[217],"including":[218],"GPQA-Diamond,":[219],"AIME":[220],"24/25/26,":[221],"HMMT":[222],"25,":[223],"Humanity's":[225],"Last":[226],"Exam,":[227],"approaching":[228],"matching":[230],"substantially":[235],"larger":[236],"such":[238],"DeepSeek-R1":[240],"Qwen3-235B.":[242]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-04T00:00:00"}
