{"id":"https://openalex.org/W7162093329","doi":"https://doi.org/10.48550/arxiv.2605.22300","title":"Cross-domain benchmarks reveal when coordinated AI agents improve scientific inference from partial evidence","display_name":"Cross-domain benchmarks reveal when coordinated AI agents improve scientific inference from partial evidence","publication_year":2026,"publication_date":"2026-05-21","ids":{"openalex":"https://openalex.org/W7162093329","doi":"https://doi.org/10.48550/arxiv.2605.22300"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.22300","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22300","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.22300","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136777854","display_name":"Fiona Y. Wong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wong, Fiona Y.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5011504360","display_name":"Markus J. Buehler","orcid":"https://orcid.org/0000-0002-4173-9659"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Buehler, Markus J.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.744700014591217,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.744700014591217,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.024399999529123306,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.020500000566244125,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vetting","display_name":"Vetting","score":0.9128999710083008},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.597100019454956},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5803999900817871},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.47870001196861267},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4537999927997589},{"id":"https://openalex.org/keywords/scientific-discovery","display_name":"Scientific discovery","score":0.38440001010894775},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.3765000104904175}],"concepts":[{"id":"https://openalex.org/C2777230681","wikidata":"https://www.wikidata.org/wiki/Q7923820","display_name":"Vetting","level":2,"score":0.9128999710083008},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6462000012397766},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.597100019454956},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5803999900817871},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.47870001196861267},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4537999927997589},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44749999046325684},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42570000886917114},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38609999418258667},{"id":"https://openalex.org/C2984917352","wikidata":"https://www.wikidata.org/wiki/Q12772819","display_name":"Scientific discovery","level":2,"score":0.38440001010894775},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.3765000104904175},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.367900013923645},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.3644999861717224},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.3402000069618225},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.33500000834465027},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.3050999939441681},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.290800005197525},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C2777705401","wikidata":"https://www.wikidata.org/wiki/Q6457570","display_name":"LEAPS","level":2,"score":0.28290000557899475},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.27399998903274536},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.26350000500679016}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.22300","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22300","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.22300","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22300","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"score":0.8783240914344788,"display_name":"Climate action","id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scientific":[0],"evidence":[1],"often":[2],"spans":[3],"instruments,":[4],"databases,":[5],"and":[6,60,80,110,149,168],"disciplines,":[7],"so":[8],"no":[9],"single":[10],"source":[11],"records":[12],"the":[13,97,117,154,165,183],"full":[14],"phenomenon.":[15],"This":[16],"makes":[17],"it":[18],"difficult":[19],"to":[20,179],"determine":[21],"when":[22,182],"coordinated":[23],"AI":[24],"agents":[25],"add":[26],"value":[27,178],"over":[28,102],"simpler":[29],"scientific":[30,42],"workflows.":[31],"We":[32],"evaluate":[33],"this":[34,172],"question":[35],"with":[36,123],"a":[37,67,124],"cross-domain":[38],"benchmark":[39,175],"spanning":[40],"four":[41],"tasks:":[43],"mapping":[44],"molecular":[45,152],"structure":[46],"into":[47],"musical":[48],"representations,":[49],"detecting":[50],"historical":[51],"paradigm":[52],"shifts":[53],"in":[54,142],"science,":[55],"identifying":[56],"vector-borne":[57],"disease":[58],"emergence,":[59],"vetting":[61,112],"transiting-exoplanet":[62],"candidates.":[63],"Each":[64],"case":[65],"uses":[66],"frozen":[68],"evaluation":[69],"panel,":[70],"predefined":[71],"scoring":[72],"protocols,":[73],"explicit":[74,193],"baselines,":[75],"ablations":[76],"or":[77,187],"null":[78],"controls,":[79],"stated":[81],"limitations.":[82],"The":[83,174],"results":[84],"define":[85],"three":[86],"operating":[87],"regimes.":[88],"When":[89,137],"different":[90],"disciplines":[91],"each":[92],"capture":[93],"only":[94,181],"part":[95],"of":[96],"phenomenon,":[98],"cross-channel":[99],"composites":[100],"improve":[101,134],"single-channel":[103],"baselines:":[104],"climate-vector":[105],"emergence":[106],"reaches":[107,113],"AUROC":[108,114],"0.944":[109],"exoplanet":[111,118],"0.955.":[115],"However,":[116],"workflow":[119],"is":[120,156,190],"effectively":[121],"tied":[122],"strong":[125],"combined-summary":[126],"baseline,":[127],"showing":[128],"that":[129],"decomposition":[130],"does":[131],"not":[132],"always":[133],"top-line":[135],"performance.":[136],"one":[138],"signal":[139],"dominates,":[140],"as":[141],"paradigm-shift":[143],"detection,":[144],"coordination":[145,180],"mainly":[146],"improves":[147],"interpretation":[148],"traceability.":[150],"For":[151],"sonification,":[153],"gain":[155],"representational":[157],"rather":[158],"than":[159],"predictive.":[160],"ScienceClaw":[161],"x":[162],"Infinite":[163],"provides":[164],"auditable":[166],"artifact":[167],"provenance":[169],"layer":[170],"for":[171],"evaluation.":[173],"therefore":[176],"assigns":[177],"corresponding":[184],"performance,":[185],"provenance,":[186],"representation":[188],"claim":[189],"supported":[191],"by":[192],"comparators.":[194]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-23T00:00:00"}
