{"id":"https://openalex.org/W4411449792","doi":"https://doi.org/10.1145/3715761","title":"Integrating Large Language Models and Reinforcement Learning for Non-linear Reasoning","display_name":"Integrating Large Language Models and Reinforcement Learning for Non-linear Reasoning","publication_year":2025,"publication_date":"2025-06-19","ids":{"openalex":"https://openalex.org/W4411449792","doi":"https://doi.org/10.1145/3715761"},"language":"en","primary_location":{"id":"doi:10.1145/3715761","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3715761","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020547989","display_name":"Yoav Alon","orcid":"https://orcid.org/0000-0003-1432-3057"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Yoav Alon","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0003-1432-3057","affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072292925","display_name":"Cristina David","orcid":"https://orcid.org/0000-0002-9106-934X"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Cristina David","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-9106-934X","affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5020547989"],"corresponding_institution_ids":["https://openalex.org/I36234482"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19606204,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"2","issue":"FSE","first_page":"957","last_page":"977"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7380017042160034},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6904039978981018},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5647324323654175},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5374942421913147},{"id":"https://openalex.org/keywords/equivalence","display_name":"Equivalence (formal languages)","score":0.5030216574668884},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.48771509528160095},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4525661766529083},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4430376887321472},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4429779052734375},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11627593636512756}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7380017042160034},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6904039978981018},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5647324323654175},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5374942421913147},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.5030216574668884},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.48771509528160095},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4525661766529083},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4430376887321472},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4429779052734375},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11627593636512756},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3715761","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3715761","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.6299999952316284,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1567525491","https://openalex.org/W1771925833","https://openalex.org/W2079055089","https://openalex.org/W2101105183","https://openalex.org/W2101832700","https://openalex.org/W2114479655","https://openalex.org/W2128888088","https://openalex.org/W2158439356","https://openalex.org/W2735643055","https://openalex.org/W2899471729","https://openalex.org/W3212083716","https://openalex.org/W4225405251","https://openalex.org/W4294753225","https://openalex.org/W4308641503","https://openalex.org/W4360836968","https://openalex.org/W4362514336","https://openalex.org/W4366999541","https://openalex.org/W4367277148","https://openalex.org/W4376122929","https://openalex.org/W4376653732","https://openalex.org/W4381712708","https://openalex.org/W4383215759","https://openalex.org/W4387356256","https://openalex.org/W4391912518","https://openalex.org/W4393434409","https://openalex.org/W4394769552","https://openalex.org/W4398229654","https://openalex.org/W4405301108","https://openalex.org/W4411225534"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W2964765435"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"were":[4,69],"shown":[5],"to":[6,48],"struggle":[7],"with":[8],"long-term":[9,92],"planning,":[10],"which":[11,20,68],"may":[12],"be":[13],"caused":[14],"by":[15,73,98],"the":[16,23,44,57,74,79,89,109,129,133,137],"limited":[17],"way":[18],"in":[19],"they":[21],"explore":[22],"space":[24,41],"of":[25,59,118,123],"possible":[26],"solutions.":[27],"We":[28,94,104,126],"propose":[29],"an":[30,39],"architecture":[31,107],"where":[32],"a":[33],"Reinforcement":[34],"Learning":[35],"(RL)":[36],"Agent":[37,45],"guides":[38],"LLM's":[40,75],"exploration:":[42],"(1)":[43],"has":[46],"access":[47],"domain-specific":[49],"information,":[50],"and":[51,65,102,113,121,136,147],"can":[52,81],"therefore":[53],"make":[54],"decisions":[55],"about":[56],"quality":[58],"candidate":[60],"solutions":[61],"based":[62],"on":[63,83,108],"specific":[64],"relevant":[66],"metrics,":[67],"not":[70],"explicitly":[71],"considered":[72],"training":[76],"objective;":[77],"(2)":[78],"LLM":[80],"focus":[82],"generating":[84],"immediate":[85],"next":[86],"steps,":[87],"without":[88],"need":[90],"for":[91],"planning.":[93],"allow":[95],"non-linear":[96],"reasoning":[97,139],"exploring":[99],"alternative":[100],"paths":[101],"backtracking.":[103],"evaluate":[105],"this":[106],"program":[110],"equivalence":[111],"task,":[112,131],"compare":[114],"it":[115],"against":[116,145],"Chain":[117],"Thought":[119],"(CoT)":[120],"Tree":[122],"Thoughts":[124],"(ToT).":[125],"assess":[127],"both":[128],"downstream":[130],"denoting":[132],"binary":[134],"classification,":[135],"intermediate":[138],"steps.":[140],"Our":[141],"approach":[142],"compares":[143],"positively":[144],"CoT":[146],"ToT.":[148]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
