{"id":"https://openalex.org/W4402427449","doi":"https://doi.org/10.1145/3627673.3679122","title":"Infinity <scp>Math:</scp> A Scalable Instruction Tuning Dataset in Programmatic Mathematical Reasoning","display_name":"Infinity <scp>Math:</scp> A Scalable Instruction Tuning Dataset in Programmatic Mathematical Reasoning","publication_year":2024,"publication_date":"2024-10-20","ids":{"openalex":"https://openalex.org/W4402427449","doi":"https://doi.org/10.1145/3627673.3679122"},"language":"en","primary_location":{"id":"doi:10.1145/3627673.3679122","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3627673.3679122","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2408.07089","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080153481","display_name":"Bo-Wen Zhang","orcid":"https://orcid.org/0000-0002-6384-2104"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bo-Wen Zhang","raw_affiliation_strings":["Beijing Academy of Artificial Intelligence, Beijing, China","China University of Mining & Technology Beijing Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-6384-2104","affiliations":[{"raw_affiliation_string":"Beijing Academy of Artificial Intelligence, Beijing, China","institution_ids":["https://openalex.org/I4210100255"]},{"raw_affiliation_string":"China University of Mining & Technology Beijing Beijing, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100395042","display_name":"Yan Yan","orcid":"https://orcid.org/0000-0002-0187-7010"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Yan","raw_affiliation_strings":["China University of Mining &amp; Technology Beijing, Beijing, China","Beijing Academy of Artificial Intelligence Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-0187-7010","affiliations":[{"raw_affiliation_string":"China University of Mining &amp; Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"Beijing Academy of Artificial Intelligence Beijing, China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Lin Li","orcid":"https://orcid.org/0009-0009-9374-2460"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Li","raw_affiliation_strings":["China University of Mining &amp; Technology Beijing, Beijing, China","Beijing Academy of Artificial Intelligence Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-9374-2460","affiliations":[{"raw_affiliation_string":"China University of Mining &amp; Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"Beijing Academy of Artificial Intelligence Beijing, China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101545733","display_name":"Guang Liu","orcid":"https://orcid.org/0000-0002-5259-7094"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guang Liu","raw_affiliation_strings":["Beijing Academy of Artificial Intelligence, Beijing, China","China University of Mining & Technology Beijing Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5259-7094","affiliations":[{"raw_affiliation_string":"Beijing Academy of Artificial Intelligence, Beijing, China","institution_ids":["https://openalex.org/I4210100255"]},{"raw_affiliation_string":"China University of Mining & Technology Beijing Beijing, China","institution_ids":["https://openalex.org/I25757504"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080153481"],"corresponding_institution_ids":["https://openalex.org/I25757504","https://openalex.org/I4210100255"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25448319,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5405","last_page":"5409"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13400","display_name":"Mathematical and Theoretical Analysis","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/2610","display_name":"Mathematical Physics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13400","display_name":"Mathematical and Theoretical Analysis","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/2610","display_name":"Mathematical Physics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12002","display_name":"Computability, Logic, AI Algorithms","score":0.940500020980835,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9308000206947327,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/infinity","display_name":"Infinity","score":0.7527156472206116},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6383417844772339},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5777170658111572},{"id":"https://openalex.org/keywords/math-education","display_name":"Math education","score":0.4173915982246399},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.375276654958725},{"id":"https://openalex.org/keywords/mathematics-education","display_name":"Mathematics education","score":0.3442305326461792},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.28618013858795166}],"concepts":[{"id":"https://openalex.org/C7321624","wikidata":"https://www.wikidata.org/wiki/Q205","display_name":"Infinity","level":2,"score":0.7527156472206116},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6383417844772339},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5777170658111572},{"id":"https://openalex.org/C2984408293","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Math education","level":2,"score":0.4173915982246399},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.375276654958725},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.3442305326461792},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28618013858795166},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3627673.3679122","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3627673.3679122","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2408.07089","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.07089","pdf_url":"https://arxiv.org/pdf/2408.07089","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2408.07089","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.07089","pdf_url":"https://arxiv.org/pdf/2408.07089","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5799999833106995,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402427449.pdf","grobid_xml":"https://content.openalex.org/works/W4402427449.grobid-xml"},"referenced_works_count":3,"referenced_works":["https://openalex.org/W2475046758","https://openalex.org/W4389520703","https://openalex.org/W4393160048"],"related_works":["https://openalex.org/W2306553012","https://openalex.org/W4251417359","https://openalex.org/W4229993731","https://openalex.org/W2381059884","https://openalex.org/W2372756075","https://openalex.org/W1979193983","https://openalex.org/W2998581040","https://openalex.org/W2220405813","https://openalex.org/W1964052846","https://openalex.org/W2121993451"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2],"Chain-of-Thoughts":[3],"(CoT)":[4],"and":[5,37,76,91,97,115,134,156],"Program-of-Thoughts":[6],"(PoT)":[7],"methods":[8,28],"have":[9],"greatly":[10],"enhanced":[11,139],"language":[12,90],"models'":[13],"mathematical":[14,59,68,163],"reasoning":[15],"capabilities,":[16],"facilitating":[17],"their":[18],"integration":[19],"into":[20],"instruction":[21,54],"tuning":[22,55],"datasets":[23],"with":[24,88,144],"LLMs.":[25],"However,":[26],"existing":[27],"for":[29,41,47,57],"large-scale":[30],"dataset":[31,56],"creation":[32],"require":[33],"substantial":[34],"seed":[35],"data":[36,42,166],"high":[38,129],"computational":[39],"costs":[40],"synthesis,":[43],"posing":[44],"significant":[45,109],"challenges":[46],"scalability.":[48],"We":[49],"introduce":[50],"InfinityMATH,":[51],"a":[52,159],"scalable":[53],"programmatic":[58],"reasoning.":[60],"The":[61,165],"construction":[62],"pipeline":[63],"emphasizes":[64],"decoupling":[65],"numbers":[66],"from":[67,119],"problems":[69],"to":[70,121],"synthesize":[71],"number-independent":[72],"programs,":[73],"enabling":[74],"efficient":[75],"flexible":[77],"scaling":[78],"while":[79],"minimizing":[80],"dependency":[81],"on":[82,112,123,131],"specific":[83],"numerical":[84],"values.":[85],"Fine-tuning":[86],"experiments":[87],"open-source":[89],"code":[92],"models,":[93,107],"such":[94],"as":[95],"Llama2":[96],"CodeLlama,":[98],"demonstrate":[99],"the":[100,132,146],"practical":[101],"benefits":[102],"of":[103,141,162],"InfinityMATH.":[104],"These":[105],"fine-tuned":[106],"showed":[108],"relative":[110],"improvements":[111],"both":[113],"in-domain":[114],"out-of-domain":[116],"benchmarks,":[117,136],"ranging":[118],"184.7%":[120],"514.3%":[122],"average.":[124],"Additionally,":[125],"these":[126],"models":[127,152],"exhibited":[128],"robustness":[130],"GSM8K+":[133],"MATH+":[135],"which":[137],"are":[138,153],"version":[140],"test":[142],"sets":[143],"simply":[145],"number":[147],"variations.":[148],"InfinityMATH":[149],"ensures":[150],"that":[151],"more":[154],"versatile":[155],"effective":[157],"across":[158],"broader":[160],"range":[161],"problems.":[164],"is":[167],"available":[168],"at":[169],"https://huggingface.co/datasets/flagopen/InfinityMATH.":[170]},"counts_by_year":[],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2024-09-11T00:00:00"}
