{"id":"https://openalex.org/W7155208480","doi":"https://doi.org/10.48550/arxiv.2604.18936","title":"Fine-Tuning Small Reasoning Models for Quantum Field Theory","display_name":"Fine-Tuning Small Reasoning Models for Quantum Field Theory","publication_year":2026,"publication_date":"2026-04-21","ids":{"openalex":"https://openalex.org/W7155208480","doi":"https://doi.org/10.48550/arxiv.2604.18936"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.18936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.18936","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111655620","display_name":"Nathaniel Woodward","orcid":"https://orcid.org/0000-0002-8051-7397"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Woodward, Nathaniel S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134311536","display_name":"Zhiqi Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Zhiqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134236996","display_name":"Yurii Kvasiuk","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kvasiuk, Yurii","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134339653","display_name":"Kendrick M. Smith","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Smith, Kendrick M.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134260250","display_name":"Frederic Sala","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sala, Frederic","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134226936","display_name":"Moritz M\u00fcnchmeyer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M\u00fcnchmeyer, Moritz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11804","display_name":"Quantum many-body systems","score":0.16410000622272491,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11804","display_name":"Quantum many-body systems","score":0.16410000622272491,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.14010000228881836,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10682","display_name":"Quantum Computing Algorithms and Architecture","score":0.0617000013589859,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.6740000247955322},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.608299970626831},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5109999775886536},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5008999705314636},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.47940000891685486},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.4765999913215637},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.373199999332428}],"concepts":[{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.6740000247955322},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.608299970626831},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6017000079154968},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5117999911308289},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5109999775886536},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5008999705314636},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.47940000891685486},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.4765999913215637},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.373199999332428},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3675999939441681},{"id":"https://openalex.org/C116222747","wikidata":"https://www.wikidata.org/wiki/Q220888","display_name":"Falsifiability","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.32910001277923584},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.31949999928474426},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29429998993873596},{"id":"https://openalex.org/C189474733","wikidata":"https://www.wikidata.org/wiki/Q917912","display_name":"Model building","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C115047598","wikidata":"https://www.wikidata.org/wiki/Q54505","display_name":"Quantum field theory","level":2,"score":0.2606000006198883}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.18936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.18936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"the":[1,33],"growing":[2],"application":[3],"of":[4,38,101,138,169],"Large":[5],"Language":[6],"Models":[7],"(LLMs)":[8],"to":[9,45,54,129,145],"theoretical":[10,46],"physics,":[11],"there":[12],"is":[13,58],"little":[14],"academic":[15,35],"exploration":[16],"into":[17],"how":[18,147],"domain-specific":[19],"physics":[20,131],"reasoning":[21,41,148,171],"ability":[22],"develops":[23],"while":[24],"training":[25,51,164],"these":[26],"models.":[27],"To":[28],"investigate":[29],"this,":[30],"we":[31,60,91,156],"perform":[32,134],"first":[34],"fine-tuning":[36],"study":[37],"small":[39],"(7B-parameter)":[40],"models":[42],"dedicated":[43],"specifically":[44],"physics.":[47],"Because":[48],"open-source":[49],"verifiable":[50,162],"data":[52,64,160],"required":[53],"train":[55],"such":[56],"capabilities":[57],"scarce,":[59],"developed":[61],"a":[62,98],"robust":[63],"generation":[65],"pipeline":[66],"that":[67],"can":[68],"both":[69,113],"create":[70],"synthetic":[71,95],"problems":[72,77,96,103],"and":[73,107,117,142,153,166],"make":[74],"existing":[75],"human-authored":[76],"suitable":[78],"for":[79],"model":[80,139],"training.":[81],"Selecting":[82],"Quantum":[83],"Field":[84],"Theory":[85],"(QFT)":[86],"as":[87,125,127],"our":[88,159],"primary":[89],"domain,":[90],"generated":[92],"over":[93],"2,500":[94],"alongside":[97],"curated":[99],"collection":[100],"human-adapted":[102],"sourced":[104],"from":[105],"arXiv":[106],"standard":[108],"pedagogical":[109],"resources.":[110],"We":[111,133],"conduct":[112],"Reinforcement":[114],"Learning":[115],"(RL)":[116],"Supervised":[118],"Fine-Tuning":[119],"(SFT)":[120],"experiments,":[121],"benchmarking":[122],"performance":[123],"gains":[124],"well":[126],"generalization":[128],"other":[130],"domains.":[132],"an":[135],"extensive":[136],"analysis":[137],"chains-of-though":[140],"before":[141],"after":[143],"fine-tuning,":[144],"understand":[146],"errors":[149],"evolve":[150],"during":[151],"RL":[152],"SFT.":[154],"Finally,":[155],"publicly":[157],"release":[158],"pipeline,":[161],"QFT":[163,170],"data,":[165],"$\\sim$200M":[167],"tokens":[168],"traces.":[172]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-23T00:00:00"}
