{"id":"https://openalex.org/W7161721716","doi":"https://doi.org/10.48550/arxiv.2605.16371","title":"GeoSym127K: Scalable Symbolically-verifiable Synthesis for Multimodal Geometric Reasoning","display_name":"GeoSym127K: Scalable Symbolically-verifiable Synthesis for Multimodal Geometric Reasoning","publication_year":2026,"publication_date":"2026-05-10","ids":{"openalex":"https://openalex.org/W7161721716","doi":"https://doi.org/10.48550/arxiv.2605.16371"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.16371","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.16371","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.16371","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127394635","display_name":"Jinhao Jing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jing, Jinhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136505172","display_name":"Zheng Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Zheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136495368","display_name":"Jinwei Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Jinwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098933159","display_name":"Qiannian ZhAO","orcid":"https://orcid.org/0009-0000-5385-8041"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Qiannian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136501395","display_name":"Shawn Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Shawn","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136456183","display_name":"Jing Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136467580","display_name":"Por Lip Yee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yee, Por Lip","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136475758","display_name":"Prayag Tiwari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tiwari, Prayag","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136502840","display_name":"Jingjing Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Jingjing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136489976","display_name":"Benyou Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Benyou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136465864","display_name":"Lewei Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Lewei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136481157","display_name":"Zhan Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Zhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.1932000070810318,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.1932000070810318,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.11599999666213989,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12536","display_name":"Topological and Geometric Data Analysis","score":0.09650000184774399,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5967000126838684},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.5514000058174133},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.5024999976158142},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.46050000190734863},{"id":"https://openalex.org/keywords/on-the-fly","display_name":"On the fly","score":0.4120999872684479},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.3799000084400177},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.3709000051021576},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.36959999799728394}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7583000063896179},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5967000126838684},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.5514000058174133},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.5024999976158142},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4945000112056732},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.46050000190734863},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.4120999872684479},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3862999975681305},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.3799000084400177},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.3709000051021576},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.3628000020980835},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.35409998893737793},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.3402999937534332},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.30970001220703125},{"id":"https://openalex.org/C51234621","wikidata":"https://www.wikidata.org/wiki/Q2149495","display_name":"Testability","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C164155591","wikidata":"https://www.wikidata.org/wiki/Q2067766","display_name":"Satisfiability modulo theories","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2881999909877777},{"id":"https://openalex.org/C73301696","wikidata":"https://www.wikidata.org/wiki/Q5469984","display_name":"Formalism (music)","level":3,"score":0.2831000089645386},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.2632000148296356},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.16371","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.16371","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.16371","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.16371","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Multimodal":[1],"Models":[2],"(LMMs)":[3],"often":[4],"struggle":[5],"with":[6,55,80,157],"geometric":[7,63],"reasoning":[8,192],"due":[9],"to":[10,60],"visual":[11],"hallucinations":[12],"and":[13,32,41,52,84,119,135,146,195,201],"a":[14,38,56,71],"lack":[15],"of":[16,97,189],"mathematically":[17],"precise":[18],"Chain-of-Thought":[19],"(CoT)":[20],"data.":[21],"To":[22],"address":[23],"this,":[24],"we":[25,68,109],"propose":[26],"the":[27,131,172,185],"GeoSym":[28,112],"Engine,":[29],"an":[30,42,94,127],"automated":[31],"scalable":[33],"neuro-symbolic":[34],"framework.":[35],"By":[36],"leveraging":[37],"type-conditional":[39],"grammar":[40],"analytic":[43],"SymGT":[44],"Solver,":[45],"it":[46],"derives":[47],"exact":[48],"symbolic":[49,81],"ground":[50,82],"truths":[51],"seamlessly":[53],"integrates":[54],"robust":[57,186],"rendering":[58],"pipeline":[59],"produce":[61],"high-precision":[62],"diagrams.":[64],"Using":[65],"this":[66,183],"engine,":[67],"construct":[69],"GeoSym127K,":[70],"difficulty-stratified":[72],"dataset":[73],"featuring":[74],"51K":[75],"high-resolution":[76],"images,":[77],"127K":[78],"questions":[79],"truths,":[83],"55K":[85],"answer-verified":[86],"CoT":[87],"QA":[88],"pairs.":[89],"We":[90],"also":[91],"introduce":[92],"GeoSym-Bench,":[93],"expert-curated":[95],"suite":[96],"511":[98],"complex":[99],"samples":[100],"for":[101],"rigorous":[102],"evaluation.":[103],"Through":[104],"extensive":[105],"supervised":[106],"fine-tuning":[107],"(SFT),":[108],"demonstrate":[110],"that":[111,164],"drives":[113],"concentrated":[114],"improvements":[115],"specifically":[116],"on":[117,130,140],"diagram-dependent":[118],"multi-step":[120],"geometry":[121],"tasks.":[122],"Our":[123],"Qwen3-VL-8B":[124],"model":[125],"gains":[126],"absolute":[128],"+22.21%":[129],"MathVerse":[132],"Vision-Only":[133],"subset":[134],"reaches":[136],"61.52%":[137],"(+6.19%":[138],"improvement)":[139],"WeMath,":[141],"mitigating":[142],"long-horizon":[143],"logic":[144],"fragmentation":[145],"outperforming":[147],"advanced":[148],"closed-source":[149],"models":[150],"like":[151],"Doubao-1.8.":[152],"Furthermore,":[153],"applying":[154],"Reinforcement":[155],"Learning":[156],"Verifiable":[158],"Rewards":[159],"(RLVR)":[160],"via":[161],"GRPO":[162],"reveals":[163],"initializing":[165],"from":[166],"structural":[167],"SFT":[168],"checkpoints":[169],"substantially":[170],"elevates":[171],"performance":[173],"ceiling":[174],"over":[175],"zero-shot":[176],"RL.":[177],"Driven":[178],"by":[179],"deterministic":[180],"exact-match":[181],"signals,":[182],"showcases":[184],"scaling":[187],"potential":[188],"our":[190],"verifiable":[191],"synthesis.":[193],"Datasets":[194],"code":[196],"are":[197],"available":[198],"at":[199],"https://huggingface.co/datasets/Tomie0506/GeoSym127K":[200],"https://github.com/Tomie56/GeoSym127K.":[202]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
