{"id":"https://openalex.org/W7114784229","doi":"https://doi.org/10.48550/arxiv.2512.08130","title":"Biothreat Benchmark Generation Framework for Evaluating Frontier AI Models I: The Task-Query Architecture","display_name":"Biothreat Benchmark Generation Framework for Evaluating Frontier AI Models I: The Task-Query Architecture","publication_year":2025,"publication_date":"2025-12-09","ids":{"openalex":"https://openalex.org/W7114784229","doi":"https://doi.org/10.48550/arxiv.2512.08130"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2512.08130","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.08130","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2512.08130","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ackerman, Gary","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ackerman, Gary","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Behlendorf, Brandon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Behlendorf, Brandon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Kallenborn, Zachary","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kallenborn, Zachary","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Almakki, Sheriff","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Almakki, Sheriff","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Clifford, Doug","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Clifford, Doug","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"LaTourette, Jenna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"LaTourette, Jenna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Peterson, Hayley","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peterson, Hayley","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sheinbaum, Noah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sheinbaum, Noah","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Shoemaker, Olivia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shoemaker, Olivia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Wetzel, Anna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wetzel, Anna","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11515","display_name":"Bacillus and Francisella bacterial research","score":0.2084999978542328,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11515","display_name":"Bacillus and Francisella bacterial research","score":0.2084999978542328,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.05249999836087227,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12391","display_name":"Artificial Immune Systems Applications","score":0.04320000112056732,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/biosecurity","display_name":"Biosecurity","score":0.7531999945640564},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7268999814987183},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6258000135421753},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.45590001344680786},{"id":"https://openalex.org/keywords/interoperability","display_name":"Interoperability","score":0.4101000130176544},{"id":"https://openalex.org/keywords/risk-assessment","display_name":"Risk assessment","score":0.4065000116825104},{"id":"https://openalex.org/keywords/risk-management","display_name":"Risk management","score":0.4034999907016754},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3968000113964081}],"concepts":[{"id":"https://openalex.org/C2781368420","wikidata":"https://www.wikidata.org/wiki/Q803874","display_name":"Biosecurity","level":2,"score":0.7531999945640564},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7268999814987183},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6258000135421753},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5751000046730042},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.5698000192642212},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.45590001344680786},{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.4101000130176544},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.4065000116825104},{"id":"https://openalex.org/C32896092","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Risk management","level":2,"score":0.4034999907016754},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3968000113964081},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3896999955177307},{"id":"https://openalex.org/C110455231","wikidata":"https://www.wikidata.org/wiki/Q897788","display_name":"Compartmentalization (fire protection)","level":3,"score":0.37790000438690186},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3693000078201294},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.36340001225471497},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.3012999892234802},{"id":"https://openalex.org/C59411770","wikidata":"https://www.wikidata.org/wiki/Q8242560","display_name":"Biological warfare","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.2912999987602234},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.26980000734329224},{"id":"https://openalex.org/C13606891","wikidata":"https://www.wikidata.org/wiki/Q2623243","display_name":"Conceptual model","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C2777363581","wikidata":"https://www.wikidata.org/wiki/Q15098235","display_name":"Harm","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C9982957","wikidata":"https://www.wikidata.org/wiki/Q864360","display_name":"Biological hazard","level":2,"score":0.25760000944137573}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2512.08130","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.08130","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2512.08130","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.08130","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Both":[0],"model":[1,42,76,199,212],"developers":[2,77],"and":[3,8,78,82,88,94,120,156,250,256],"policymakers":[4],"seek":[5],"to":[6,24,29,74,124,138,195,224],"quantify":[7],"mitigate":[9],"the":[10,39,47,58,84,104,132,162,165,173,184,205,215,219,245],"risk":[11,49,86,127],"of":[12,35,41,61,92,103,152,167,175,241,248,263],"rapidly-evolving":[13],"frontier":[14],"artificial":[15],"intelligence":[16],"(AI)":[17],"models,":[18,97],"especially":[19],"large":[20],"language":[21],"models":[22],"(LLMs),":[23],"facilitate":[25],"bioterrorism":[26],"or":[27],"access":[28],"biological":[30,141,233,254,264],"weapons.":[31],"An":[32],"important":[33],"element":[34],"such":[36],"efforts":[37,194],"is":[38,72,134,146],"development":[40,166,174],"benchmarks":[43,207],"that":[44,107],"can":[45,208],"assess":[46,83],"biosecurity":[48,85],"posed":[50],"by":[51],"a":[52,62,130,149,226,260],"particular":[53],"model.":[54],"This":[55,170],"paper":[56,171],"describes":[57],"first":[59,135],"component":[60],"novel":[63],"Biothreat":[64,186,221],"Benchmark":[65],"Generation":[66],"(BBG)":[67],"Framework.":[68],"The":[69,144],"BBG":[70,133,145,216],"approach":[71],"designed":[73],"help":[75],"evaluators":[79],"reliably":[80],"measure":[81],"uplift":[87],"general":[89],"harm":[90],"potential":[91],"existing":[93],"future":[95,189],"AI":[96],"while":[98,188],"accounting":[99],"for":[100,164,211,230,253,259],"key":[101],"aspects":[102],"threat":[105],"itself":[106],"are":[108],"often":[109],"overlooked":[110],"in":[111],"other":[112],"benchmarking":[113],"efforts,":[114],"including":[115,218],"different":[116],"actor":[117],"capability":[118],"levels,":[119],"operational":[121,251],"(in":[122],"addition":[123],"purely":[125],"technical)":[126],"factors.":[128],"As":[129],"pilot,":[131],"being":[136],"developed":[137],"address":[139],"bacterial":[140,232],"threats":[142],"only.":[143],"built":[147],"upon":[148],"hierarchical":[150],"structure":[151,229],"biothreat":[153,177],"categories,":[154],"elements":[155],"tasks,":[157],"which":[158,180,243,257],"then":[159],"serves":[160],"as":[161,201,203],"basis":[163],"task-aligned":[168],"queries.":[169],"outlines":[172],"this":[176],"task-query":[178],"architecture,":[179],"we":[181],"have":[182],"named":[183],"Bacterial":[185,220],"Schema,":[187,222],"papers":[190],"will":[191],"describe":[192],"follow-on":[193],"turn":[196],"queries":[197],"into":[198],"prompts,":[200],"well":[202],"how":[204],"resulting":[206],"be":[209],"implemented":[210],"evaluation.":[213],"Overall,":[214],"Framework,":[217],"seeks":[223],"offer":[225],"robust,":[227],"re-usable":[228],"evaluating":[231],"risks":[234],"arising":[235],"from":[236],"LLMs":[237],"across":[238],"multiple":[239],"levels":[240],"aggregation,":[242],"captures":[244],"full":[246],"scope":[247],"technical":[249],"requirements":[252],"adversaries,":[255],"accounts":[258],"wide":[261],"spectrum":[262],"adversary":[265],"capabilities.":[266]},"counts_by_year":[],"updated_date":"2025-12-11T23:13:37.075516","created_date":"2025-12-11T00:00:00"}
