{"id":"https://openalex.org/W4394973321","doi":"https://doi.org/10.48550/arxiv.2404.11891","title":"Large Language Models Can Solve Real-World Planning Rigorously with Formal Verification Tools","display_name":"Large Language Models Can Solve Real-World Planning Rigorously with Formal Verification Tools","publication_year":2024,"publication_date":"2024-04-18","ids":{"openalex":"https://openalex.org/W4394973321","doi":"https://doi.org/10.48550/arxiv.2404.11891"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2404.11891","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.11891","pdf_url":"https://arxiv.org/pdf/2404.11891","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2404.11891","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113034869","display_name":"Yilun Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hao, Yilun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115580462","display_name":"Yongchao Chen","orcid":"https://orcid.org/0000-0001-7792-0989"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yongchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102363214","display_name":"Yang Zhang","orcid":"https://orcid.org/0000-0003-4696-9119"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5019603699","display_name":"Chuchu Fan","orcid":"https://orcid.org/0000-0003-4671-233X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Chuchu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5113034869"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.95169997215271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.95169997215271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.916100025177002,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/plan","display_name":"Plan (archaeology)","score":0.615423321723938},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.578336775302887},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.43700164556503296},{"id":"https://openalex.org/keywords/formal-methods","display_name":"Formal methods","score":0.42215168476104736},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.36019736528396606},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.12484118342399597},{"id":"https://openalex.org/keywords/archaeology","display_name":"Archaeology","score":0.09839510917663574}],"concepts":[{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.615423321723938},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.578336775302887},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.43700164556503296},{"id":"https://openalex.org/C75606506","wikidata":"https://www.wikidata.org/wiki/Q1049183","display_name":"Formal methods","level":2,"score":0.42215168476104736},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.36019736528396606},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.12484118342399597},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.09839510917663574}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2404.11891","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.11891","pdf_url":"https://arxiv.org/pdf/2404.11891","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2404.11891","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2404.11891","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2404.11891","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.11891","pdf_url":"https://arxiv.org/pdf/2404.11891","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4720003262","display_name":null,"funder_award_id":"N00014-22","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G8876996369","display_name":null,"funder_award_id":"N00014","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4394973321.pdf","grobid_xml":"https://content.openalex.org/works/W4394973321.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2488581006","https://openalex.org/W1587246512","https://openalex.org/W2913714777","https://openalex.org/W61995002"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"struggle":[4],"to":[5,141],"directly":[6],"generate":[7],"correct":[8],"plans":[9,47],"for":[10,177],"complex":[11,73],"multi-constraint":[12,74],"planning":[13,26,67,75],"problems,":[14,80],"even":[15],"with":[16,48,93,114,191],"self-verification":[17],"and":[18,71,87,100,111,138,163,175,182,189,202],"self-critique.":[19],"For":[20],"example,":[21],"a":[22,49,106],"U.S.":[23],"domestic":[24],"travel":[25,46,136],"benchmark":[27],"TravelPlanner":[28,94],"was":[29],"proposed":[30],"in":[31,130],"Xie":[32],"et":[33],"al.":[34],"(2024),":[35],"where":[36],"the":[37,96,157],"best":[38],"LLM":[39],"OpenAI":[40],"o1-preview":[41],"can":[42,155,173],"only":[43],"find":[44],"viable":[45],"10%":[50],"success":[51,107],"rate":[52,108],"given":[53],"all":[54,194],"needed":[55],"information.":[56],"In":[57],"this":[58,62],"work,":[59],"we":[60],"tackle":[61],"by":[63,85],"proposing":[64],"an":[65,178],"LLM-based":[66],"framework":[68,104,121,154,172,199],"that":[69,102,170,193],"formalizes":[70],"solves":[72],"problems":[76],"as":[77,95],"constrained":[78],"satisfiability":[79,89],"which":[81],"are":[82,151,200],"further":[83],"consumed":[84],"sound":[86],"complete":[88],"solvers.":[90],"We":[91,168],"start":[92],"primary":[97],"use":[98],"case":[99],"show":[101,169],"our":[103,120,131,153,171,198],"achieves":[105],"of":[109,180,197],"93.9%":[110],"is":[112],"effective":[113,201],"diverse":[115],"paraphrased":[116],"prompts.":[117],"More":[118],"importantly,":[119],"has":[122],"strong":[123],"zero-shot":[124],"generalizability,":[125],"successfully":[126],"handling":[127],"unseen":[128,134],"constraints":[129],"newly":[132],"created":[133],"international":[135],"dataset":[137],"generalizing":[139],"well":[140],"new":[142],"fundamentally":[143],"different":[144],"domains.":[145],"Moreover,":[146],"when":[147],"user":[148],"input":[149],"queries":[150,185],"infeasible,":[152],"identify":[156],"unsatisfiable":[158,184],"core,":[159],"provide":[160],"failure":[161],"reasons,":[162],"offers":[164],"personalized":[165],"modification":[166],"suggestions.":[167],"modify":[174],"solve":[176],"average":[179],"81.6%":[181],"91.7%":[183],"from":[186],"two":[187],"datasets":[188],"prove":[190],"ablations":[192],"key":[195],"components":[196],"necessary.":[203],"Project":[204],"page:":[205],"https://sites.google.com/view/llm-rwplanning.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2024-04-20T00:00:00"}
