{"id":"https://openalex.org/W7129614796","doi":"https://doi.org/10.48550/arxiv.2602.14917","title":"BFS-PO: Best-First Search for Large Reasoning Models","display_name":"BFS-PO: Best-First Search for Large Reasoning Models","publication_year":2026,"publication_date":"2026-02-16","ids":{"openalex":"https://openalex.org/W7129614796","doi":"https://doi.org/10.48550/arxiv.2602.14917"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.14917","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.14917","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.14917","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115426697","display_name":"Fiorenzo Parascandolo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Parascandolo, Fiorenzo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126257566","display_name":"Wenhui Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Wenhui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024183744","display_name":"Enver Sangineto","orcid":"https://orcid.org/0000-0002-5187-4133"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sangineto, Enver","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126233718","display_name":"Ruihua Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Ruihua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122647287","display_name":"Rita Cucchiara","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cucchiara, Rita","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5115426697"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.30869999527931213,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.30869999527931213,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.26589998602867126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.04270000010728836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/backtracking","display_name":"Backtracking","score":0.5462999939918518},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4180999994277954},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.41780000925064087},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.40290001034736633},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.39879998564720154},{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.3978999853134155},{"id":"https://openalex.org/keywords/opportunistic-reasoning","display_name":"Opportunistic reasoning","score":0.3917999863624573},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.36500000953674316}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6970999836921692},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6230999827384949},{"id":"https://openalex.org/C156884757","wikidata":"https://www.wikidata.org/wiki/Q798554","display_name":"Backtracking","level":2,"score":0.5462999939918518},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4180999994277954},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.41780000925064087},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.39879998564720154},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.3978999853134155},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.3917999863624573},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37290000915527344},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.36500000953674316},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.34689998626708984},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.3407999873161316},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C50335755","wikidata":"https://www.wikidata.org/wiki/Q483247","display_name":"Phenomenon","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2508000135421753},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.14917","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.14917","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.14917","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.14917","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Reasoning":[1],"Models":[2],"(LRMs)":[3],"such":[4,56],"as":[5,42,57],"OpenAI":[6],"o1":[7],"and":[8,33,112,125],"DeepSeek-R1":[9],"have":[10],"shown":[11],"excellent":[12],"performance":[13],"in":[14],"reasoning":[15,19,107],"tasks":[16],"using":[17,72,86],"long":[18],"chains.":[20,108],"However,":[21],"this":[22,60,70],"has":[23],"also":[24],"led":[25],"to":[26,46,104],"a":[27,39,73,87],"significant":[28],"increase":[29,121],"of":[30,36],"computational":[31],"costs":[32],"the":[34,82,122],"generation":[35],"verbose":[37],"output,":[38],"phenomenon":[40],"known":[41],"overthinking.":[43],"The":[44],"tendency":[45],"overthinking":[47],"is":[48],"often":[49],"exacerbated":[50],"by":[51],"Reinforcement":[52],"Learning":[53],"(RL)":[54],"algorithms":[55],"GRPO/DAPO.":[58],"In":[59],"paper,":[61],"we":[62,115],"propose":[63],"BFS-PO,":[64],"an":[65],"RL":[66],"algorithm":[67],"which":[68],"alleviates":[69],"problem":[71],"Best-First":[74],"Search":[75],"exploration":[76],"strategy.":[77],"Specifically,":[78],"BFS-PO":[79,102,118],"looks":[80],"for":[81],"shortest":[83],"correct":[84],"answer":[85],"backtracking":[88],"mechanism":[89],"based":[90],"on":[91],"maximum":[92],"entropy":[93],"nodes.":[94],"By":[95],"generating":[96],"progressively":[97],"shorter":[98],"responses":[99],"during":[100],"training,":[101],"learns":[103],"produce":[105],"concise":[106],"Using":[109],"different":[110],"benchmarks":[111],"base":[113],"LRMs,":[114],"show":[116],"that":[117],"can":[119],"simultaneously":[120],"LRM":[123],"accuracy":[124],"shorten":[126],"its":[127],"answers.":[128]},"counts_by_year":[],"updated_date":"2026-02-18T06:25:47.457606","created_date":"2026-02-18T00:00:00"}
