{"id":"https://openalex.org/W7106264172","doi":"https://doi.org/10.48550/arxiv.2511.16315","title":"BioBench: A Blueprint to Move Beyond ImageNet for Scientific ML Benchmarks","display_name":"BioBench: A Blueprint to Move Beyond ImageNet for Scientific ML Benchmarks","publication_year":2025,"publication_date":"2025-11-20","ids":{"openalex":"https://openalex.org/W7106264172","doi":"https://doi.org/10.48550/arxiv.2511.16315"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2511.16315","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.16315","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2511.16315","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Stevens, Samuel","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Stevens, Samuel","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.6980999708175659,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.6980999708175659,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.06620000302791595,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.03400000184774399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/blueprint","display_name":"Blueprint","score":0.6504999995231628},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.6492999792098999},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4325999915599823},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4226999878883362},{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.3596000075340271},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.34689998626708984},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.3467999994754791},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.34360000491142273}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6671000123023987},{"id":"https://openalex.org/C155911762","wikidata":"https://www.wikidata.org/wiki/Q422321","display_name":"Blueprint","level":2,"score":0.6504999995231628},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.6492999792098999},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5666999816894531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4772999882698059},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4562999904155731},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4325999915599823},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4226999878883362},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.3596000075340271},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.34689998626708984},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3467999994754791},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.34360000491142273},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.3280999958515167},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.3215000033378601},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.3206999897956848},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3127000033855438},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.3027999997138977},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C77660652","wikidata":"https://www.wikidata.org/wiki/Q150971","display_name":"Computer graphics","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.271699994802475},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2615000009536743},{"id":"https://openalex.org/C110332635","wikidata":"https://www.wikidata.org/wiki/Q629498","display_name":"Genetic programming","level":2,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2511.16315","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.16315","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2511.16315","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.16315","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"ImageNet-1K":[0],"linear-probe":[1],"transfer":[2],"accuracy":[3,29],"remains":[4],"the":[5],"default":[6],"proxy":[7],"for":[8,106,124,133],"visual":[9],"representation":[10],"quality,":[11],"yet":[12],"it":[13],"no":[14],"longer":[15],"predicts":[16],"performance":[17],"on":[18,35,116],"scientific":[19],"imagery.":[20],"Across":[21],"46":[22],"modern":[23],"vision":[24,52,126],"model":[25],"checkpoints,":[26],"ImageNet":[27,57],"top-1":[28],"explains":[30],"only":[31],"34%":[32],"of":[33,41],"variance":[34],"ecology":[36,51,128],"tasks":[37],"and":[38,69,79,99,108,129,142,148],"mis-ranks":[39],"30%":[40],"models":[42,111],"above":[43],"75%":[44],"accuracy.":[45],"We":[46],"present":[47],"BioBench,":[48],"an":[49,117],"open":[50],"benchmark":[53],"that":[54],"captures":[55],"what":[56],"misses.":[58],"BioBench":[59,120],"unifies":[60],"9":[61],"publicly":[62],"released,":[63],"application-driven":[64],"tasks,":[65],"4":[66],"taxonomic":[67],"kingdoms,":[68],"6":[70,114],"acquisition":[71],"modalities":[72],"(drone":[73],"RGB,":[74],"web":[75],"video,":[76],"micrographs,":[77],"in-situ":[78],"specimen":[80],"photos,":[81],"camera-trap":[82],"frames),":[83],"totaling":[84],"3.1M":[85],"images.":[86],"A":[87],"single":[88],"Python":[89],"API":[90],"downloads":[91],"data,":[92],"fits":[93],"lightweight":[94],"classifiers":[95],"to":[96],"frozen":[97],"backbones,":[98],"reports":[100],"class-balanced":[101],"macro-F1":[102],"(plus":[103],"domain":[104],"metrics":[105],"FishNet":[107],"FungiCLEF);":[109],"ViT-L":[110],"evaluate":[112],"in":[113,127,138],"hours":[115],"A6000":[118],"GPU.":[119],"provides":[121],"new":[122],"signal":[123],"computer":[125],"a":[130],"template":[131],"recipe":[132],"building":[134],"reliable":[135],"AI-for-science":[136],"benchmarks":[137],"any":[139],"domain.":[140],"Code":[141],"predictions":[143],"are":[144],"available":[145],"at":[146,150],"https://github.com/samuelstevens/biobench":[147],"results":[149],"https://samuelstevens.me/biobench.":[151]},"counts_by_year":[],"updated_date":"2025-11-23T05:13:22.807545","created_date":"2025-11-23T00:00:00"}
