{"id":"https://openalex.org/W7151758105","doi":"https://doi.org/10.48550/arxiv.2604.05114","title":"$\u03c0^2$: Structure-Originated Reasoning Data Improves Long-Context Reasoning Ability of Large Language Models","display_name":"$\u03c0^2$: Structure-Originated Reasoning Data Improves Long-Context Reasoning Ability of Large Language Models","publication_year":2026,"publication_date":"2026-04-06","ids":{"openalex":"https://openalex.org/W7151758105","doi":"https://doi.org/10.48550/arxiv.2604.05114"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.05114","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05114","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.05114","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133190046","display_name":"Quyet V. Do","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Do, Quyet V.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133231215","display_name":"Thinh Pham","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pham, Thinh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133221215","display_name":"Nguyen Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Nguyen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133235362","display_name":"Sha Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Sha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133186862","display_name":"Pratibha Zunjare","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zunjare, Pratibha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133154184","display_name":"Tu Vu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vu, Tu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3978999853134155,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3978999853134155,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.218299999833107,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.039900001138448715,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.605400025844574},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.5613999962806702},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.5393000245094299},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5339999794960022},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.5175999999046326},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.4788999855518341},{"id":"https://openalex.org/keywords/opportunistic-reasoning","display_name":"Opportunistic reasoning","score":0.45590001344680786},{"id":"https://openalex.org/keywords/verbal-reasoning","display_name":"Verbal reasoning","score":0.4023999869823456}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7203999757766724},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.605400025844574},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5914000272750854},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.5613999962806702},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.545199990272522},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.5393000245094299},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5339999794960022},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.5175999999046326},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.4788999855518341},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.45590001344680786},{"id":"https://openalex.org/C36964233","wikidata":"https://www.wikidata.org/wiki/Q7920942","display_name":"Verbal reasoning","level":3,"score":0.4023999869823456},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3878999948501587},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.3813000023365021},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.37700000405311584},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.36550000309944153},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.35190001130104065},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.3474000096321106},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.2849000096321106},{"id":"https://openalex.org/C183521366","wikidata":"https://www.wikidata.org/wiki/Q7256422","display_name":"Psychology of reasoning","level":4,"score":0.27649998664855957},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2703000009059906},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.26669999957084656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.05114","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05114","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.05114","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05114","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.510267436504364,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"study":[1],"a":[2],"pipeline":[3],"that":[4],"curates":[5],"reasoning":[6,15,26,52,70,95,128],"data":[7,11,27],"from":[8,37,40],"initial":[9],"structured":[10,69],"for":[12],"improving":[13],"long-context":[14,94],"in":[16],"large":[17],"language":[18],"models":[19,137],"(LLMs).":[20],"Our":[21,133],"approach,":[22],"$\u03c0^2$,":[23],"constructs":[24],"high-quality":[25],"through":[28,61],"rigorous":[29],"QA":[30,75],"curation:":[31],"1)":[32],"extracting":[33],"and":[34,44,49,59,65,85,97,108,136],"expanding":[35],"tables":[36,43],"Wikipedia,":[38],"2)":[39],"the":[41],"collected":[42],"relevant":[45],"context,":[46],"generating":[47],"realistic":[48,78],"multi-hop":[50],"analytical":[51],"questions":[53],"whose":[54],"answers":[55],"are":[56,138],"automatically":[57],"determined":[58],"verified":[60],"dual-path":[62],"code":[63],"execution,":[64],"3)":[66],"back-translating":[67],"step-by-step":[68],"traces":[71],"as":[72],"solutions":[73],"of":[74,106],"pairs":[76],"given":[77],"web-search":[79],"context.":[80],"Supervised":[81],"fine-tuning":[82],"with":[83,101,125],"\\textsc{\\small{gpt-oss-20b}}":[84,117],"\\textsc{\\small{Qwen3-4B-Instruct-2507}}":[86],"on":[87],"$\u03c0^2$":[88],"yields":[89],"consistent":[90],"improvements":[91],"across":[92],"four":[93],"benchmarks":[96],"our":[98,112],"alike":[99],"$\u03c0^2$-Bench,":[100],"average":[102,121],"absolute":[103],"accuracy":[104],"gains":[105],"+4.3%":[107],"+2.7%":[109],"respectively.":[110],"Notably,":[111],"dataset":[113],"facilitates":[114],"self-distillation,":[115],"where":[116],"even":[118],"improves":[119],"its":[120,126],"performance":[122],"by":[123],"+4.4%":[124],"own":[127],"traces,":[129],"demonstrating":[130],"$\u03c0^2$'s":[131],"usefulness.":[132],"code,":[134],"data,":[135],"open-source":[139],"at":[140],"https://github.com/vt-pi-squared/pi-squared.":[141]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-09T00:00:00"}
