{"id":"https://openalex.org/W6925638580","doi":"https://doi.org/10.18420/se2025-ws-12","title":"On the logical (in)consistency of code-generating LLMs","display_name":"On the logical (in)consistency of code-generating LLMs","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W6925638580","doi":"https://doi.org/10.18420/se2025-ws-12"},"language":"en","primary_location":{"id":"doi:10.18420/se2025-ws-12","is_oa":true,"landing_page_url":"https://doi.org/10.18420/se2025-ws-12","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.18420/se2025-ws-12","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Dong, Ke","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dong, Ke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Hsu, William","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hsu, William","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Hitzler, Pascal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hitzler, Pascal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Vasserman, Eugene Y.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vasserman, Eugene Y.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.41931843,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10032","display_name":"Marine and coastal ecosystems","score":0.2964000105857849,"subfield":{"id":"https://openalex.org/subfields/1910","display_name":"Oceanography"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10032","display_name":"Marine and coastal ecosystems","score":0.2964000105857849,"subfield":{"id":"https://openalex.org/subfields/1910","display_name":"Oceanography"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10255","display_name":"Oceanographic and Atmospheric Processes","score":0.14869999885559082,"subfield":{"id":"https://openalex.org/subfields/1910","display_name":"Oceanography"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12142","display_name":"Marine Invertebrate Physiology and Ecology","score":0.10440000146627426,"subfield":{"id":"https://openalex.org/subfields/1911","display_name":"Paleontology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.67330002784729},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6517999768257141},{"id":"https://openalex.org/keywords/odds","display_name":"Odds","score":0.49889999628067017},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.477400004863739},{"id":"https://openalex.org/keywords/logical-consequence","display_name":"Logical consequence","score":0.4620000123977661},{"id":"https://openalex.org/keywords/causal-consistency","display_name":"Causal consistency","score":0.4462999999523163},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4422000050544739},{"id":"https://openalex.org/keywords/logical-conjunction","display_name":"Logical conjunction","score":0.43070000410079956}],"concepts":[{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.67330002784729},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6517999768257141},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5978999733924866},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5460000038146973},{"id":"https://openalex.org/C143095724","wikidata":"https://www.wikidata.org/wiki/Q515895","display_name":"Odds","level":3,"score":0.49889999628067017},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.477400004863739},{"id":"https://openalex.org/C134752490","wikidata":"https://www.wikidata.org/wiki/Q374182","display_name":"Logical consequence","level":2,"score":0.4620000123977661},{"id":"https://openalex.org/C175652121","wikidata":"https://www.wikidata.org/wiki/Q4379351","display_name":"Causal consistency","level":5,"score":0.4462999999523163},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4422000050544739},{"id":"https://openalex.org/C21847791","wikidata":"https://www.wikidata.org/wiki/Q191081","display_name":"Logical conjunction","level":2,"score":0.43070000410079956},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.37549999356269836},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.35339999198913574},{"id":"https://openalex.org/C45357846","wikidata":"https://www.wikidata.org/wiki/Q2001982","display_name":"Notation","level":2,"score":0.31349998712539673},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31209999322891235},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3059000074863434},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2824000120162964},{"id":"https://openalex.org/C43971567","wikidata":"https://www.wikidata.org/wiki/Q3142865","display_name":"Logical reasoning","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C82029504","wikidata":"https://www.wikidata.org/wiki/Q4373882","display_name":"Sequential consistency","level":4,"score":0.26919999718666077},{"id":"https://openalex.org/C2778514511","wikidata":"https://www.wikidata.org/wiki/Q1374194","display_name":"Programmer","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18420/se2025-ws-12","is_oa":true,"landing_page_url":"https://doi.org/10.18420/se2025-ws-12","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.18420/se2025-ws-12","is_oa":true,"landing_page_url":"https://doi.org/10.18420/se2025-ws-12","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"generative":[3,189],"Large":[4],"Language":[5],"Model":[6],"(LLM)-based":[7],"programming":[8,54],"assistants":[9],"have":[10],"soared":[11],"in":[12,36,117,142,192,204],"popularity.":[13],"Owing":[14],"to":[15],"the":[16,43,65,79,99,137,143,162,195],"black-box":[17],"nature":[18],"of":[19,45,53,57,68,81,139,181,198],"Deep":[20],"Neural":[21],"Networks":[22],"on":[23,85,90,149],"which":[24],"they":[25],"are":[26,114],"based,":[27],"there":[28,113],"is":[29,71,173],"ongoing":[30],"concern":[31],"about":[32],"potential":[33],"correctness":[34],"issues":[35],"generated":[37],"code.":[38],"Recent":[39],"work":[40,59],"has":[41,60,145],"analyzed":[42],"reliability":[44],"LLMs":[46,84],"as":[47],"code":[48,70,96,124,160,183],"generators":[49],"for":[50,123],"a":[51,146],"number":[52],"languages.":[55],"Most":[56],"that":[58,112,169],"examined":[61],"syntactic":[62],"correctness,":[63],"but":[64],"logical":[66,86,92,171,199],"consistency":[67,87,172],"syntactically-correct":[69],"another":[72],"critical":[73],"factor":[74],"affecting":[75],"correctness.":[76],"We":[77],"test":[78],"performance":[80],"five":[82],"light-weight":[83],"tasks":[88],"based":[89],"simple":[91,197],"inversion,":[93],"using":[94],"real-world":[95],"samples":[97],"from":[98],"CodeSearchNet":[100],"Python":[101],"dataset.":[102],"Using":[103],"Cohen\u2019s":[104],"d":[105],"and":[106,132,186,202],"Odds":[107],"Ratio":[108],"metrics,":[109],"we":[110,167],"show":[111],"significant":[115],"differences":[116],"whether":[118],"models,":[119],"even":[120,194],"those":[121],"designed":[122],"generation,":[125],"produce":[126],"\u201csensible\u201d":[127],"code,":[128],"i.e.,":[129],"syntactically":[130],"correct":[131],"also":[133],"logically":[134],"consistent.":[135],"Moreover,":[136],"amount":[138],"context":[140],"included":[141],"prompt":[144],"major":[147],"effect":[148],"model":[150],"performance:":[151],"models":[152],"perform":[153],"worse":[154],"when":[155],"given":[156],"comments":[157],"and/or":[158],"additional":[159],"around":[161],"function":[163],"being":[164],"tested.":[165],"Overall,":[166],"find":[168],"models\u2019":[170],"well-aligned":[174],"with":[175],"EvalPlus@1":[176],"scores":[177],"(a":[178],"popular":[179],"measure":[180],"LLM":[182],"generation":[184],"capability),":[185],"further":[187],"confirm":[188],"LLMs\u2019":[190],"shortcomings":[191],"handling":[193,205],"most":[196],"\u201creasoning\u201d":[200],"tasks,":[201],"challenges":[203],"contextual":[206],"information.":[207]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
