{"id":"https://openalex.org/W7105670201","doi":"https://doi.org/10.1109/tlt.2025.3630117","title":"Benchmarking In-Context Learning Strategies of Large Language Models for Math Reasoning Tasks","display_name":"Benchmarking In-Context Learning Strategies of Large Language Models for Math Reasoning Tasks","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W7105670201","doi":"https://doi.org/10.1109/tlt.2025.3630117"},"language":null,"primary_location":{"id":"doi:10.1109/tlt.2025.3630117","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tlt.2025.3630117","pdf_url":null,"source":{"id":"https://openalex.org/S130363450","display_name":"IEEE Transactions on Learning Technologies","issn_l":"1939-1382","issn":["1939-1382","2372-0050"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Learning Technologies","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1109/tlt.2025.3630117","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yao Rong","orcid":"https://orcid.org/0000-0002-6031-3741"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Yao Rong","raw_affiliation_strings":["Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kathrin Se\u00dfler","orcid":"https://orcid.org/0000-0002-3380-4641"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Kathrin Se\u00dfler","raw_affiliation_strings":["Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Emek G\u00f6zl\u00fckl\u00fc","orcid":null},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Emek G\u00f6zl\u00fckl\u00fc","raw_affiliation_strings":["Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":null,"display_name":"Enkelejda Kasneci","orcid":"https://orcid.org/0000-0003-3146-4484"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Enkelejda Kasneci","raw_affiliation_strings":["Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.79362261,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"18","issue":null,"first_page":"1074","last_page":"1082"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.13210000097751617,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.13210000097751617,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.12929999828338623,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.11400000005960464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8611000180244446},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.736299991607666},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5922999978065491},{"id":"https://openalex.org/keywords/cornerstone","display_name":"Cornerstone","score":0.5388000011444092},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.43810001015663147},{"id":"https://openalex.org/keywords/mathematical-model","display_name":"Mathematical model","score":0.4083000123500824},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.40299999713897705}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8611000180244446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.77920001745224},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.736299991607666},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5922999978065491},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5496000051498413},{"id":"https://openalex.org/C2780616401","wikidata":"https://www.wikidata.org/wiki/Q1133673","display_name":"Cornerstone","level":2,"score":0.5388000011444092},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4860000014305115},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.43810001015663147},{"id":"https://openalex.org/C76969082","wikidata":"https://www.wikidata.org/wiki/Q486902","display_name":"Mathematical model","level":2,"score":0.4083000123500824},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.40299999713897705},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.3831999897956848},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.30489999055862427},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.29829999804496765},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C2779193601","wikidata":"https://www.wikidata.org/wiki/Q20026918","display_name":"Mathematical theory","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.25929999351501465},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2563999891281128}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tlt.2025.3630117","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tlt.2025.3630117","pdf_url":null,"source":{"id":"https://openalex.org/S130363450","display_name":"IEEE Transactions on Learning Technologies","issn_l":"1939-1382","issn":["1939-1382","2372-0050"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Learning Technologies","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/tlt.2025.3630117","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tlt.2025.3630117","pdf_url":null,"source":{"id":"https://openalex.org/S130363450","display_name":"IEEE Transactions on Learning Technologies","issn_l":"1939-1382","issn":["1939-1382","2372-0050"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Learning Technologies","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4706546664237976,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2962800603","https://openalex.org/W4323655724","https://openalex.org/W4385245566","https://openalex.org/W4385569771","https://openalex.org/W4385570391","https://openalex.org/W4385571689","https://openalex.org/W4385572162","https://openalex.org/W4389524021","https://openalex.org/W4389615177","https://openalex.org/W4391494845","https://openalex.org/W4399281666","https://openalex.org/W4401042689","https://openalex.org/W4411638692"],"related_works":[],"abstract_inverted_index":{"The":[0],"use":[1],"of":[2,14,20,40,51,60,128,186],"Large":[3],"Language":[4],"Models":[5],"(LLMs)":[6],"in":[7,34,72,189],"mathematical":[8,55,100,107,131,147],"reasoning":[9,148],"has":[10],"become":[11],"a":[12,89],"cornerstone":[13],"related":[15],"research,":[16],"demonstrating":[17],"the":[18,38,49,58,118,125,151,159,165,168,173,184],"intelligence":[19],"these":[21],"models":[22,139,158,188],"and":[23,42,78,122,142],"enabling":[24],"potential":[25],"practical":[26,126],"applications":[27,127],"through":[28],"their":[29],"advanced":[30],"performance,":[31,123],"such":[32],"as":[33],"educational":[35,82],"settings.":[36],"Despite":[37],"variety":[39],"datasets":[41,65,108],"in-context":[43,96,160],"learning":[44,97,161],"algorithms":[45,74,98],"designed":[46],"to":[47,53,69,182],"improve":[48],"ability":[50],"LLMs":[52,129],"automate":[54],"problem":[56,101],"solving,":[57],"lack":[59],"comprehensive":[61],"benchmarking":[62],"across":[63,103],"different":[64],"makes":[66],"it":[67],"difficult":[68],"determine":[70],"which":[71],"context":[73],"are":[75],"effective,":[76],"efficient,":[77],"suitable":[79],"for":[80,99,130,156],"specific":[81],"applications.":[83],"In":[84],"this":[85],"project,":[86],"we":[87,116],"present":[88],"benchmark":[90,180],"that":[91,136],"fairly":[92],"compares":[93],"seven":[94],"state-of-the-art":[95],"solving":[102],"five":[104],"widely":[105],"used":[106],"on":[109,172],"four":[110],"powerful":[111],"foundation":[112,138,175],"models.":[113],"Beyond":[114],"accuracy,":[115],"explore":[117],"trade-off":[119],"between":[120],"efficiency":[121],"highlighting":[124],"reasoning.":[132],"Our":[133],"results":[134],"indicate":[135],"larger":[137],"like":[140],"GPT-4o,":[141],"LLaMA":[143],"3-70B":[144],"can":[145],"solve":[146],"independently":[149],"from":[150],"concrete":[152],"prompting":[153],"strategy,":[154],"while":[155],"smaller":[157],"approach":[162],"significantly":[163],"influences":[164],"performance.":[166],"Moreover,":[167],"optimal":[169],"prompt":[170],"depends":[171],"chosen":[174],"model.":[176],"We":[177],"open-source":[178],"our":[179],"code":[181],"support":[183],"integration":[185],"additional":[187],"future":[190],"research.":[191]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-11-14T00:00:00"}
