{"id":"https://openalex.org/W4403346517","doi":"https://doi.org/10.48550/arxiv.2410.05563","title":"Rational Metareasoning for Large Language Models","display_name":"Rational Metareasoning for Large Language Models","publication_year":2024,"publication_date":"2024-10-07","ids":{"openalex":"https://openalex.org/W4403346517","doi":"https://doi.org/10.48550/arxiv.2410.05563"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.05563","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.05563","pdf_url":"https://arxiv.org/pdf/2410.05563","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.05563","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109819563","display_name":"C. Nicol\u00f2 De Sabbata","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"De Sabbata, C. Nicol\u00f2","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047918106","display_name":"Theodore R. Sumers","orcid":"https://orcid.org/0000-0002-6128-0291"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sumers, Theodore R.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077079119","display_name":"Thomas L. Griffiths","orcid":"https://orcid.org/0000-0002-5138-7255"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"AlKhamissi, Badr","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Bosselut, Antoine","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bosselut, Antoine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Griffiths, Thomas L.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Griffiths, Thomas L.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5109819563"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9679999947547913,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9679999947547913,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9668999910354614,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9664000272750854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4042733907699585},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.403232216835022},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.18382012844085693}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4042733907699585},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.403232216835022},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.18382012844085693}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.05563","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.05563","pdf_url":"https://arxiv.org/pdf/2410.05563","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.05563","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.05563","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.05563","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.05563","pdf_url":"https://arxiv.org/pdf/2410.05563","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403346517.pdf","grobid_xml":"https://content.openalex.org/works/W4403346517.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W4402327032","https://openalex.org/W2382290278"],"abstract_inverted_index":{"Being":[0],"prompted":[1],"to":[2,22,68,101,106],"engage":[3],"in":[4,30,63],"reasoning":[5,72],"has":[6],"emerged":[7],"as":[8,27],"a":[9,53,80],"core":[10],"technique":[11],"for":[12],"using":[13],"large":[14],"language":[15],"models":[16,59],"(LLMs),":[17],"deploying":[18],"additional":[19],"inference-time":[20],"compute":[21],"improve":[23],"task":[24,127],"performance.":[25],"However,":[26],"LLMs":[28,67],"increase":[29],"both":[31],"size":[32],"and":[33,110],"adoption,":[34],"inference":[35,116],"costs":[36,117],"are":[37],"correspondingly":[38],"becoming":[39],"increasingly":[40],"burdensome.":[41],"How,":[42],"then,":[43],"might":[44],"we":[45],"optimize":[46],"reasoning's":[47],"cost-performance":[48],"tradeoff?":[49],"This":[50],"work":[51],"introduces":[52],"novel":[54],"approach":[55],"based":[56],"on":[57],"computational":[58],"of":[60,87],"metareasoning":[61],"used":[62],"cognitive":[64],"science,":[65],"training":[66],"selectively":[69],"use":[70,94],"intermediate":[71],"steps":[73],"only":[74],"when":[75],"necessary.":[76],"We":[77],"first":[78],"develop":[79],"reward":[81,96],"function":[82,97],"that":[83],"incorporates":[84],"the":[85,103],"Value":[86],"Computation":[88],"by":[89],"penalizing":[90],"unnecessary":[91],"reasoning,":[92],"then":[93],"this":[95],"with":[98],"Expert":[99],"Iteration":[100],"train":[102],"LLM.":[104],"Compared":[105],"few-shot":[107],"chain-of-thought":[108],"prompting":[109],"STaR,":[111],"our":[112],"method":[113],"significantly":[114],"reduces":[115],"(20-37\\%":[118],"fewer":[119],"tokens":[120],"generated":[121],"across":[122,129],"three":[123],"models)":[124],"while":[125],"maintaining":[126],"performance":[128],"diverse":[130],"datasets.":[131]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
