{"id":"https://openalex.org/W4416036874","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.149","title":"Unleashing the Reasoning Potential of LLMs by Critique Fine-Tuning on One Problem","display_name":"Unleashing the Reasoning Potential of LLMs by Critique Fine-Tuning on One Problem","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036874","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.149"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.149","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.149","pdf_url":"https://aclanthology.org/2025.emnlp-main.149.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.149.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100383181","display_name":"Yubo Wang","orcid":"https://orcid.org/0000-0001-8751-2233"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yubo Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053562090","display_name":"Ping Nie","orcid":"https://orcid.org/0000-0002-8640-8685"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ping Nie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120309142","display_name":"Kai Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kai Zou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114142693","display_name":"Lijun Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lijun Wu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5018077929","display_name":"Wenjia Chen","orcid":"https://orcid.org/0000-0001-8201-7145"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenhu Chen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100383181"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18451589,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3017","last_page":"3027"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11010","display_name":"Logic, Reasoning, and Knowledge","score":0.4293000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11010","display_name":"Logic, Reasoning, and Knowledge","score":0.4293000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.10130000114440918,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.0674000009894371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/government","display_name":"Government (linguistics)","score":0.3188000023365021},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.30730000138282776},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.2603999972343445}],"concepts":[{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.43709999322891235},{"id":"https://openalex.org/C190253527","wikidata":"https://www.wikidata.org/wiki/Q295354","display_name":"Law and economics","level":1,"score":0.37630000710487366},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.33629998564720154},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.3352999985218048},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.3312999904155731},{"id":"https://openalex.org/C118084267","wikidata":"https://www.wikidata.org/wiki/Q26110","display_name":"Positive economics","level":1,"score":0.32100000977516174},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.3188000023365021},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C138921699","wikidata":"https://www.wikidata.org/wiki/Q47555","display_name":"Political economy","level":1,"score":0.3046000003814697},{"id":"https://openalex.org/C95124753","wikidata":"https://www.wikidata.org/wiki/Q875686","display_name":"Environmental ethics","level":1,"score":0.2734000086784363},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.2655999958515167},{"id":"https://openalex.org/C47768531","wikidata":"https://www.wikidata.org/wiki/Q1127188","display_name":"Development economics","level":1,"score":0.26109999418258667},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.25839999318122864}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.149","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.149","pdf_url":"https://aclanthology.org/2025.emnlp-main.149.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.149","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.149","pdf_url":"https://aclanthology.org/2025.emnlp-main.149.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036874.pdf","grobid_xml":"https://content.openalex.org/works/W4416036874.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"have":[1,32],"witnessed":[2],"that":[3,34,90],"strong":[4],"LLMs":[5,122],"like":[6],"Qwen-Math,":[7],"MiMo,":[8],"and":[9,119,129,143,171,212],"Phi-4":[10],"possess":[11],"immense":[12],"reasoning":[13,29,49,79,102,150,176,218],"potential":[14,80,103],"inherited":[15],"from":[16,134,187],"the":[17,78,101,140,185,195,217],"pre-training":[18],"stage.With":[19],"reinforcement":[20],"learning":[21],"(RL),":[22],"these":[23,47,82],"models":[24],"can":[25,45,98],"improve":[26],"dramatically":[27],"on":[28,37,94,139,167,173],"tasks.Recent":[30],"studies":[31,193],"shown":[33],"even":[35,183],"RL":[36,51,60,188],"a":[38,67,72,116,209],"single":[39,117],"problem":[40,97,118],"(Wang":[41],"et":[42],"al.,":[43],"2025a)":[44],"unleash":[46,77,100],"models'":[48],"capabilities.However,":[50],"is":[52],"not":[53],"only":[54,95],"expensive":[55],"but":[56],"also":[57],"unstable.Even":[58],"one-shot":[59,198],"requires":[61],"hundreds":[62],"of":[63,81,104,158,165,197,220],"GPU":[64,156],"hours.This":[65],"raises":[66],"critical":[68],"question:":[69],"Is":[70],"there":[71],"more":[73],"efficient":[74],"way":[75],"to":[76,115,123,136,181,215],"powerful":[83],"base":[84],"LLMs?In":[85],"this":[86],"work,":[87],"we":[88],"demonstrate":[89],"Critique":[91],"Fine-Tuning":[92],"(CFT)":[93],"one":[96],"effectively":[99],"LLMs.Our":[105],"method":[106],"constructs":[107],"critique":[108],"data":[109,142],"by":[110],"collecting":[111],"diverse":[112,149],"model-generated":[113],"solutions":[114],"using":[120],"teacher":[121],"provide":[124],"detailed":[125],"critiques.We":[126],"finetune":[127],"Qwen":[128],"Llama":[130],"family":[131],"models,":[132],"ranging":[133],"1.5B":[135],"14B":[137],"parameters,":[138],"CFT":[141,199,207],"observe":[144],"significant":[145],"performance":[146],"gains":[147],"across":[148,200],"tasks.For":[151],"example,":[152],"with":[153,189],"just":[154],"5":[155],"hours":[157],"training,":[159],"Qwen-Math-7B-CFT":[160],"show":[161],"an":[162],"average":[163],"improvement":[164],"15%":[166],"six":[168],"math":[169],"benchmarks":[170],"16%":[172],"three":[174],"logic":[175],"benchmarks.These":[177],"results":[178,186,204],"are":[179],"comparable":[180],"or":[182],"surpass":[184],"20x":[190],"less":[191],"compute.Ablation":[192],"reveal":[194],"robustness":[196],"different":[201],"prompt":[202],"problems.These":[203],"highlight":[205],"oneshot":[206],"as":[208],"simple,":[210],"general,":[211],"computeefficient":[213],"approach":[214],"unleashing":[216],"capabilities":[219],"modern":[221],"LLMs.":[222]},"counts_by_year":[],"updated_date":"2026-03-12T06:13:28.667946","created_date":"2025-11-08T00:00:00"}
