{"id":"https://openalex.org/W7150740589","doi":"https://doi.org/10.48550/arxiv.2604.02709","title":"Evaluating the Formal Reasoning Capabilities of Large Language Models through Chomsky Hierarchy","display_name":"Evaluating the Formal Reasoning Capabilities of Large Language Models through Chomsky Hierarchy","publication_year":2026,"publication_date":"2026-04-03","ids":{"openalex":"https://openalex.org/W7150740589","doi":"https://doi.org/10.48550/arxiv.2604.02709"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.02709","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.02709","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.02709","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133007327","display_name":"Yihong Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Yihong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Xiao, Jianha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Jianha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133041267","display_name":"Xue Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Xue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133011489","display_name":"Xuyuan Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Xuyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133040960","display_name":"Zhiyuan Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Zhiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101021419","display_name":"Jiaru Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Jiaru","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133020720","display_name":"Kechi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Kechi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133020000","display_name":"Jia Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133033244","display_name":"Zhi Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Zhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133015530","display_name":"Ge Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ge","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.5640000104904175,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.5640000104904175,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06459999829530716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.04520000144839287,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/formal-language","display_name":"Formal language","score":0.5185999870300293},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.5001000165939331},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.4830999970436096},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4659999907016754},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.3955000042915344},{"id":"https://openalex.org/keywords/chomsky-hierarchy","display_name":"Chomsky hierarchy","score":0.3801000118255615}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6793000102043152},{"id":"https://openalex.org/C146072743","wikidata":"https://www.wikidata.org/wiki/Q192161","display_name":"Formal language","level":2,"score":0.5185999870300293},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.5001000165939331},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.4830999970436096},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47429999709129333},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4659999907016754},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3955000042915344},{"id":"https://openalex.org/C92273848","wikidata":"https://www.wikidata.org/wiki/Q190913","display_name":"Chomsky hierarchy","level":3,"score":0.3801000118255615},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36070001125335693},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C75606506","wikidata":"https://www.wikidata.org/wiki/Q1049183","display_name":"Formal methods","level":2,"score":0.2973000109195709},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C111498074","wikidata":"https://www.wikidata.org/wiki/Q173326","display_name":"Formal verification","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2597000002861023},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2581000030040741},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.02709","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.02709","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.02709","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.02709","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"formal":[1,33,51,215,246],"reasoning":[2,34,247],"capabilities":[3,121],"of":[4,50,73,108,112,138,223,229,240],"LLMs":[5,17,43,69,204,242],"are":[6,205],"crucial":[7],"for":[8,16,66,83,213],"advancing":[9],"automated":[10],"software":[11,231],"engineering.":[12],"However,":[13],"existing":[14],"benchmarks":[15],"lack":[18],"systematic":[19],"evaluation":[20,97],"based":[21],"on":[22],"computation":[23],"and":[24,101,115,155,164,233],"complexity,":[25],"leaving":[26],"a":[27,64,109,128,143],"critical":[28],"gap":[29],"in":[30],"understanding":[31],"their":[32],"capabilities.":[35,248],"Therefore,":[36],"it":[37],"is":[38,87,106],"still":[39],"unknown":[40],"whether":[41],"SOTA":[42],"can":[44],"grasp":[45],"the":[46,71,88,135,220,227,238],"structured,":[47],"hierarchical":[48],"complexity":[49,199],"languages":[52],"as":[53],"defined":[54],"by":[55],"Computation":[56],"Theory.":[57],"To":[58],"address":[59],"this,":[60],"we":[61,158],"introduce":[62],"ChomskyBench,":[63],"benchmark":[65],"systematically":[67],"evaluating":[68],"through":[70],"lens":[72],"Chomsky":[74,93],"Hierarchy.":[75],"Unlike":[76],"prior":[77],"work":[78],"that":[79,132,160,186,203],"uses":[80],"vectorized":[81],"classification":[82],"neural":[84],"networks,":[85],"ChomskyBench":[86,105],"first":[89],"to":[90,119,236],"combine":[91],"full":[92],"Hierarchy":[94],"coverage,":[95],"process-trace":[96],"via":[98],"natural":[99],"language,":[100],"deterministic":[102],"symbolic":[103],"verifiability.":[104],"composed":[107],"comprehensive":[110],"suite":[111],"language":[113],"recognition":[114],"generation":[116],"tasks":[117],"designed":[118],"test":[120],"at":[122],"each":[123],"level.":[124],"Extensive":[125],"experiments":[126],"indicate":[127],"clear":[129],"performance":[130],"stratification":[131],"correlates":[133],"with":[134,243],"hierarchy's":[136],"levels":[137],"complexity.":[139],"Our":[140],"analysis":[141,200],"reveals":[142],"direct":[144],"relationship":[145],"where":[146],"increasing":[147],"task":[148],"difficulty":[149],"substantially":[150],"impacts":[151],"both":[152],"inference":[153,166],"length":[154],"performance.":[156],"Furthermore,":[157],"find":[159],"while":[161],"larger":[162],"models":[163],"advanced":[165],"methods":[167],"offer":[168],"notable":[169],"relative":[170],"gains,":[171],"they":[172],"face":[173],"severe":[174],"efficiency":[175],"barriers:":[176],"achieving":[177],"practical":[178,221],"reliability":[179],"would":[180],"require":[181],"prohibitive":[182],"computational":[183],"costs,":[184],"revealing":[185],"current":[187,224],"limitations":[188],"stem":[189],"from":[190],"inefficiency":[191],"rather":[192],"than":[193,209],"absolute":[194],"capability":[195],"bounds.":[196],"A":[197],"time":[198],"further":[201],"indicates":[202],"significantly":[206],"less":[207],"efficient":[208],"traditional":[210,230],"algorithmic":[211],"programs":[212],"these":[214],"tasks.":[216],"These":[217],"results":[218],"delineate":[219],"limits":[222],"LLMs,":[225],"highlight":[226],"indispensability":[228],"tools,":[232],"provide":[234],"insights":[235],"guide":[237],"development":[239],"future":[241],"more":[244],"powerful":[245]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-07T00:00:00"}
