{"id":"https://openalex.org/W7151463498","doi":"https://doi.org/10.48550/arxiv.2604.04220","title":"TimeSeek: Temporal Reliability of Agentic Forecasters","display_name":"TimeSeek: Temporal Reliability of Agentic Forecasters","publication_year":2026,"publication_date":"2026-04-05","ids":{"openalex":"https://openalex.org/W7151463498","doi":"https://doi.org/10.48550/arxiv.2604.04220"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.04220","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04220","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.04220","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036840249","display_name":"Hamza Mostafa","orcid":"https://orcid.org/0000-0003-4319-9542"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mostafa, Hamza","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031507145","display_name":"Om Shastri","orcid":"https://orcid.org/0000-0002-9136-631X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shastri, Om","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5072563536","display_name":"Dennis Lee","orcid":"https://orcid.org/0000-0001-6900-5158"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Dennis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5036840249"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11918","display_name":"Forecasting Techniques and Applications","score":0.4690000116825104,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11918","display_name":"Forecasting Techniques and Applications","score":0.4690000116825104,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11674","display_name":"Sports Analytics and Performance","score":0.2029000073671341,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.0786999985575676,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/snapshot","display_name":"Snapshot (computer storage)","score":0.6273999810218811},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5565000176429749},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5016999840736389},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.3939000070095062},{"id":"https://openalex.org/keywords/brier-score","display_name":"Brier score","score":0.33480000495910645},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.29670000076293945}],"concepts":[{"id":"https://openalex.org/C55282118","wikidata":"https://www.wikidata.org/wiki/Q252683","display_name":"Snapshot (computer storage)","level":2,"score":0.6273999810218811},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6087999939918518},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5565000176429749},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.525600016117096},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5016999840736389},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4059000015258789},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.3939000070095062},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.37779998779296875},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3747999966144562},{"id":"https://openalex.org/C35405484","wikidata":"https://www.wikidata.org/wiki/Q4967066","display_name":"Brier score","level":2,"score":0.33480000495910645},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2881999909877777},{"id":"https://openalex.org/C2779190172","wikidata":"https://www.wikidata.org/wiki/Q4913888","display_name":"Binary data","level":3,"score":0.2881999909877777},{"id":"https://openalex.org/C27564746","wikidata":"https://www.wikidata.org/wiki/Q913709","display_name":"Market research","level":2,"score":0.287200003862381},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.2797999978065491},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.275299996137619},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.265500009059906}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.04220","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04220","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.04220","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04220","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,20],"introduce":[1],"TimeSeek,":[2],"a":[3,16,50,118,123],"benchmark":[4],"for":[5,40,75],"studying":[6],"how":[7],"the":[8,104],"reliability":[9],"of":[10,83],"agentic":[11],"LLM":[12],"forecasters":[13],"changes":[14],"over":[15],"prediction":[17],"market's":[18,51],"lifecycle.":[19],"evaluate":[21],"10":[22],"frontier":[23],"models":[24],"on":[25,54,64,91],"150":[26],"CFTC-regulated":[27],"Kalshi":[28],"binary":[29],"markets":[30],"at":[31],"five":[32],"temporal":[33],"checkpoints,":[34],"with":[35],"and":[36,53,63,113],"without":[37,102],"web":[38],"search,":[39],"15,000":[41],"forecasts":[42],"total.":[43],"Models":[44],"are":[45],"most":[46],"competitive":[47,60],"early":[48],"in":[49,81],"life":[52],"high-uncertainty":[55],"markets,":[56],"but":[57,93],"much":[58],"less":[59],"near":[61],"resolution":[62],"strong-consensus":[65],"markets.":[66],"Web":[67],"search":[68],"improves":[69],"pooled":[70],"Brier":[71],"Skill":[72],"Score":[73],"(BSS)":[74],"every":[76],"model":[77],"overall,":[78],"yet":[79],"hurts":[80],"12%":[82],"model-checkpoint":[84],"pairs,":[85],"indicating":[86],"that":[87],"retrieval":[88],"is":[89],"helpful":[90],"average":[92],"not":[94],"uniformly":[95],"so.":[96],"Simple":[97],"two-model":[98],"ensembles":[99],"reduce":[100],"error":[101],"surpassing":[103],"market":[105,120],"overall.":[106],"These":[107],"descriptive":[108],"results":[109],"motivate":[110],"time-aware":[111],"evaluation":[112],"selective-deference":[114],"policies":[115],"rather":[116],"than":[117],"single":[119],"snapshot":[121],"or":[122],"uniform":[124],"tool-use":[125],"setting.":[126]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-08T00:00:00"}
