{"id":"https://openalex.org/W7162128195","doi":"https://doi.org/10.48550/arxiv.2605.22672","title":"Is Capability a Liability? More Capable Language Models Make Worse Forecasts When It Matters Most","display_name":"Is Capability a Liability? More Capable Language Models Make Worse Forecasts When It Matters Most","publication_year":2026,"publication_date":"2026-05-21","ids":{"openalex":"https://openalex.org/W7162128195","doi":"https://doi.org/10.48550/arxiv.2605.22672"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.22672","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22672","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.22672","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026428321","display_name":"Nick Merrill","orcid":"https://orcid.org/0000-0003-3669-1387"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Merrill, Nick","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136804453","display_name":"Jaeho Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Jaeho","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5046987433","display_name":"Ezra Karger","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karger, Ezra","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11918","display_name":"Forecasting Techniques and Applications","score":0.09889999777078629,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11918","display_name":"Forecasting Techniques and Applications","score":0.09889999777078629,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10410","display_name":"COVID-19 epidemiological studies","score":0.06120000034570694,"subfield":{"id":"https://openalex.org/subfields/2611","display_name":"Modeling and Simulation"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12011","display_name":"Insurance, Mortality, Demography, Risk Management","score":0.05739999935030937,"subfield":{"id":"https://openalex.org/subfields/3317","display_name":"Demography"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6018000245094299},{"id":"https://openalex.org/keywords/sign","display_name":"Sign (mathematics)","score":0.5936999917030334},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4814000129699707},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.47429999709129333},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.46140000224113464},{"id":"https://openalex.org/keywords/series","display_name":"Series (stratigraphy)","score":0.42399999499320984},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4235999882221222}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6018000245094299},{"id":"https://openalex.org/C139676723","wikidata":"https://www.wikidata.org/wiki/Q1193832","display_name":"Sign (mathematics)","level":2,"score":0.5936999917030334},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.5619999766349792},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5130000114440918},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4814000129699707},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.47429999709129333},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.46140000224113464},{"id":"https://openalex.org/C143724316","wikidata":"https://www.wikidata.org/wiki/Q312468","display_name":"Series (stratigraphy)","level":2,"score":0.42399999499320984},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4235999882221222},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.4124000072479248},{"id":"https://openalex.org/C151406439","wikidata":"https://www.wikidata.org/wiki/Q186588","display_name":"Time series","level":2,"score":0.39259999990463257},{"id":"https://openalex.org/C89992363","wikidata":"https://www.wikidata.org/wiki/Q5961558","display_name":"Track (disk drive)","level":2,"score":0.36079999804496765},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35249999165534973},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3294000029563904},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C163175372","wikidata":"https://www.wikidata.org/wiki/Q3339222","display_name":"Linear model","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.28220000863075256},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27709999680519104},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2531999945640564},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.22672","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22672","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.22672","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22672","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7388297915458679,"id":"https://metadata.un.org/sdg/3","display_name":"Good health and well-being"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,172],"document":[1],"inverse":[2,127],"scaling":[3,128],"in":[4,25,51,63,136],"LLMs":[5],"on":[6,42,66,132,147,168],"forecasting":[7,52,138,176],"problems":[8],"whose":[9],"underlying":[10],"time":[11],"series":[12],"exhibit":[13],"superlinear":[14],"growth":[15],"and":[16,27,61,71,112],"tail":[17,99],"risk":[18],"of":[19,94,105,143,164,183],"regime":[20],"change,":[21],"a":[22,45,57],"structure":[23],"common":[24,135],"finance":[26],"epidemiology.":[28],"On":[29],"these":[30],"tasks,":[31],"more":[32,85],"capable":[33,86],"models":[34,87],"produce":[35],"worse":[36],"distributional":[37],"forecasts.":[38],"The":[39],"pattern":[40],"appears":[41],"ForecastBench-Sim":[43],"(FBSim),":[44],"contamination-free,":[46],"simulated-world":[47],"benchmark":[48],"we":[49],"release,":[50],"synthetic":[53],"SIR":[54],"epidemics":[55],"with":[56],"matched":[58],"linear":[59],"control,":[60],"replicates":[62],"real-world":[64],"datasets":[65],"COVID-19,":[67],"measles,":[68],"housing":[69],"markets,":[70],"hyperinflation.":[72],"A":[73,102],"per-quantile":[74],"decomposition":[75],"shows":[76,107],"the":[77,81,97,141,144,156,162,165,169],"failure":[78],"concentrates":[79],"at":[80,152],"upper":[82],"tail,":[83],"which":[84],"shift":[88],"upward":[89],"to":[90,116],"track":[91],"aggressive":[92],"extrapolations":[93],"growth,":[95],"while":[96],"lower":[98],"stays":[100],"put.":[101],"within-family":[103],"study":[104],"Llama-3.1":[106],"that":[108,174],"both":[109],"model":[110],"scale":[111],"post-training":[113],"independently":[114],"contribute":[115],"this":[117],"effect.":[118],"Domain":[119],"knowledge":[120],"does":[121,129],"not":[122,130],"reliably":[123],"rescue":[124],"calibration.":[125],"This":[126],"appear":[131],"single-threshold":[133],"metrics":[134],"LLM":[137,175],"benchmarks,":[139],"reversing":[140],"sign":[142,163],"capability--accuracy":[145,166],"relationship":[146,167],"identical":[148],"outputs.":[149,171],"Single-threshold":[150],"scoring":[151,160],"conventional":[153],"cutoffs":[154],"misses":[155],"upper-tail":[157],"cost;":[158],"tail-inclusive":[159],"reverses":[161],"same":[170],"recommend":[173],"evaluations":[177],"use":[178],"continuous":[179],"(and":[180],"unbounded)":[181],"measures":[182],"accuracy":[184],"alongside":[185],"bounded":[186],"binary":[187],"threshold":[188],"metrics.":[189]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-23T00:00:00"}
