{"id":"https://openalex.org/W4415542893","doi":"https://doi.org/10.1145/3773083","title":"FineMath: A Fine-Grained Mathematical Evaluation Benchmark for Chinese Large Language Models","display_name":"FineMath: A Fine-Grained Mathematical Evaluation Benchmark for Chinese Large Language Models","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415542893","doi":"https://doi.org/10.1145/3773083"},"language":"en","primary_location":{"id":"doi:10.1145/3773083","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3773083","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yan Liu","orcid":"https://orcid.org/0009-0008-2283-1725"},"institutions":[{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yan Liu","raw_affiliation_strings":["School of Computer Science and Technology/ TJUNLP Lab, Tianjin University"],"raw_orcid":"https://orcid.org/0009-0008-2283-1725","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology/ TJUNLP Lab, Tianjin University","institution_ids":["https://openalex.org/I136765683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074106607","display_name":"Renren Jin","orcid":null},"institutions":[{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Renren Jin","raw_affiliation_strings":["School of Computer Science and Technology/ TJUNLP Lab, Tianjin University"],"raw_orcid":"https://orcid.org/0009-0009-7452-9883","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology/ TJUNLP Lab, Tianjin University","institution_ids":["https://openalex.org/I136765683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100427533","display_name":"Ling Shi","orcid":"https://orcid.org/0009-0003-5406-8967"},"institutions":[{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ling Shi","raw_affiliation_strings":["School of Computer Science and Technology/ TJUNLP Lab, Tianjin University"],"raw_orcid":"https://orcid.org/0009-0003-5406-8967","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology/ TJUNLP Lab, Tianjin University","institution_ids":["https://openalex.org/I136765683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109964204","display_name":"Zheng Yao","orcid":null},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zheng Yao","raw_affiliation_strings":["The University of Queensland"],"raw_orcid":"https://orcid.org/0009-0006-9007-3976","affiliations":[{"raw_affiliation_string":"The University of Queensland","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055232825","display_name":"Deyi Xiong","orcid":"https://orcid.org/0000-0002-2353-5038"},"institutions":[{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Deyi Xiong","raw_affiliation_strings":["School of Computer Science and Technology/ TJUNLP Lab, Tianjin University"],"raw_orcid":"https://orcid.org/0000-0002-2353-5038","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology/ TJUNLP Lab, Tianjin University","institution_ids":["https://openalex.org/I136765683"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I136765683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14956307,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"24","issue":"12","first_page":"1","last_page":"15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9559000134468079,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6601999998092651},{"id":"https://openalex.org/keywords/mathematical-model","display_name":"Mathematical model","score":0.5580000281333923},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5109999775886536},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5047000050544739},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.42160001397132874},{"id":"https://openalex.org/keywords/mathematical-structure","display_name":"Mathematical structure","score":0.41609999537467957},{"id":"https://openalex.org/keywords/cover","display_name":"Cover (algebra)","score":0.39329999685287476},{"id":"https://openalex.org/keywords/mathematical-theory","display_name":"Mathematical theory","score":0.3684999942779541}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6601999998092651},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.65420001745224},{"id":"https://openalex.org/C76969082","wikidata":"https://www.wikidata.org/wiki/Q486902","display_name":"Mathematical model","level":2,"score":0.5580000281333923},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5364000201225281},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5109999775886536},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5047000050544739},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.42160001397132874},{"id":"https://openalex.org/C130327152","wikidata":"https://www.wikidata.org/wiki/Q748349","display_name":"Mathematical structure","level":2,"score":0.41609999537467957},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.39329999685287476},{"id":"https://openalex.org/C2779193601","wikidata":"https://www.wikidata.org/wiki/Q20026918","display_name":"Mathematical theory","level":2,"score":0.3684999942779541},{"id":"https://openalex.org/C47884741","wikidata":"https://www.wikidata.org/wiki/Q1166618","display_name":"Mathematical logic","level":2,"score":0.3675000071525574},{"id":"https://openalex.org/C66783780","wikidata":"https://www.wikidata.org/wiki/Q1166625","display_name":"Mathematical problem","level":2,"score":0.35899999737739563},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35199999809265137},{"id":"https://openalex.org/C172347087","wikidata":"https://www.wikidata.org/wiki/Q3217196","display_name":"Language of mathematics","level":2,"score":0.34290000796318054},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3319000005722046},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.33000001311302185},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.3068000078201294},{"id":"https://openalex.org/C2776299755","wikidata":"https://www.wikidata.org/wiki/Q432449","display_name":"Carry (investment)","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.25940001010894775}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3773083","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3773083","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2105717194","https://openalex.org/W2251935656","https://openalex.org/W2276364082","https://openalex.org/W2962800603","https://openalex.org/W3034643750","https://openalex.org/W3170403598","https://openalex.org/W3204510023","https://openalex.org/W4412887812"],"related_works":[],"abstract_inverted_index":{"To":[0],"thoroughly":[1],"assess":[2],"the":[3,56,87,103,152,167],"mathematical":[4,21,24,43,59,81,138,175],"reasoning":[5,82,106,139,176],"abilities":[6,83],"of":[7,32,73,80,84,90,105,121,137,141,173],"Large":[8],"Language":[9],"Models":[10],"(LLMs),":[11],"we":[12,35],"need":[13],"to":[14,54,102,109],"carefully":[15],"curate":[16],"evaluation":[17,44,153],"datasets":[18],"covering":[19],"diverse":[20],"concepts":[22,60],"and":[23,125,155,170],"problems":[25,93],"at":[26,182],"different":[27],"difficulty":[28,99],"levels.":[29],"In":[30],"pursuit":[31],"this":[33,39],"objective,":[34],"propose":[36],"FineMath":[37,51,124],"in":[38,62,135],"article,":[40],"a":[41,118],"fine-grained":[42],"benchmark":[45],"dataset":[46],"for":[47,133],"assessing":[48],"Chinese":[49,142],"LLMs.":[50,85,143],"is":[52,129,180],"created":[53],"cover":[55],"major":[57],"key":[58],"taught":[61],"elementary":[63],"school":[64],"math,":[65],"which":[66],"are":[67,94],"further":[68],"divided":[69],"into":[70],"17":[71,88],"categories":[72,89],"math":[74,91],"word":[75,92],"problems,":[76],"enabling":[77],"in-depth":[78,149],"analysis":[79,150],"All":[86],"manually":[95],"annotated":[96],"with":[97],"their":[98,174],"levels":[100],"according":[101],"number":[104],"steps":[107],"required":[108],"solve":[110],"these":[111],"problems.":[112],"We":[113,144],"conduct":[114],"extensive":[115],"experiments":[116],"on":[117,123,151],"wide":[119],"range":[120],"LLMs":[122],"find":[126],"that":[127,157],"there":[128],"still":[130],"considerable":[131],"room":[132],"improvements":[134],"terms":[136],"capability":[140],"also":[145],"carry":[146],"out":[147],"an":[148],"process":[154],"methods":[156],"have":[158],"been":[159],"overlooked":[160],"previously.":[161],"These":[162],"two":[163],"factors":[164],"significantly":[165],"influence":[166],"model":[168],"results":[169],"our":[171],"understanding":[172],"capabilities.":[177],"Our":[178],"data":[179],"available":[181],"https://github.com/tjunlp-lab/FineMATH":[183],".":[184]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-25T00:00:00"}
