{"id":"https://openalex.org/W7135040567","doi":"https://doi.org/10.48550/arxiv.2603.10950","title":"When should we trust the annotation? Selective prediction for molecular structure retrieval from mass spectra","display_name":"When should we trust the annotation? Selective prediction for molecular structure retrieval from mass spectra","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135040567","doi":"https://doi.org/10.48550/arxiv.2603.10950"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.10950","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10950","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.10950","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128833541","display_name":"Mira J\u00fcrgens","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"J\u00fcrgens, Mira","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052622361","display_name":"Gaetan De Waele","orcid":"https://orcid.org/0000-0003-0367-9699"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"De Waele, Gaetan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001030142","display_name":"Morteza Rakhshaninejad","orcid":"https://orcid.org/0000-0001-5250-9165"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rakhshaninejad, Morteza","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5028945060","display_name":"Willem Waegeman","orcid":"https://orcid.org/0000-0002-5950-3003"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Waegeman, Willem","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.45559999346733093,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.45559999346733093,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.21610000729560852,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.08550000190734863,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/uncertainty-quantification","display_name":"Uncertainty quantification","score":0.5810999870300293},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.553600013256073},{"id":"https://openalex.org/keywords/uncertainty-analysis","display_name":"Uncertainty analysis","score":0.45739999413490295},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.4410000145435333},{"id":"https://openalex.org/keywords/fingerprint","display_name":"Fingerprint (computing)","score":0.4171999990940094},{"id":"https://openalex.org/keywords/measurement-uncertainty","display_name":"Measurement uncertainty","score":0.4052000045776367},{"id":"https://openalex.org/keywords/propagation-of-uncertainty","display_name":"Propagation of uncertainty","score":0.38089999556541443}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5859000086784363},{"id":"https://openalex.org/C32230216","wikidata":"https://www.wikidata.org/wiki/Q7882499","display_name":"Uncertainty quantification","level":2,"score":0.5810999870300293},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.553600013256073},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5001999735832214},{"id":"https://openalex.org/C177803969","wikidata":"https://www.wikidata.org/wiki/Q29205","display_name":"Uncertainty analysis","level":2,"score":0.45739999413490295},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.4410000145435333},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.424699991941452},{"id":"https://openalex.org/C2777826928","wikidata":"https://www.wikidata.org/wiki/Q3745713","display_name":"Fingerprint (computing)","level":2,"score":0.4171999990940094},{"id":"https://openalex.org/C137209882","wikidata":"https://www.wikidata.org/wiki/Q1403517","display_name":"Measurement uncertainty","level":2,"score":0.4052000045776367},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3991999924182892},{"id":"https://openalex.org/C123614077","wikidata":"https://www.wikidata.org/wiki/Q1364905","display_name":"Propagation of uncertainty","level":2,"score":0.38089999556541443},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3675000071525574},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C176147448","wikidata":"https://www.wikidata.org/wiki/Q1889114","display_name":"Sensitivity analysis","level":3,"score":0.32019999623298645},{"id":"https://openalex.org/C44249647","wikidata":"https://www.wikidata.org/wiki/Q208498","display_name":"Confidence interval","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C94361409","wikidata":"https://www.wikidata.org/wiki/Q7882500","display_name":"Uncertainty reduction theory","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C2778067643","wikidata":"https://www.wikidata.org/wiki/Q166507","display_name":"Interval (graph theory)","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C193244246","wikidata":"https://www.wikidata.org/wiki/Q5432696","display_name":"False discovery rate","level":3,"score":0.25699999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.10950","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10950","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.10950","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10950","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Machine":[0],"learning":[1],"methods":[2],"for":[3,56,151],"identifying":[4],"molecular":[5,57,98],"structures":[6],"from":[7,60,67,120],"tandem":[8],"mass":[9],"spectra":[10],"(MS/MS)":[11],"have":[12,36],"advanced":[13],"rapidly,":[14],"yet":[15],"current":[16],"approaches":[17],"still":[18],"exhibit":[19],"significant":[20],"error":[21,186],"rates.":[22],"In":[23],"high-stakes":[24],"applications":[25],"such":[26],"as":[27,123,125],"clinical":[28],"metabolomics":[29],"and":[30,83,101,116,159,188],"environmental":[31],"screening,":[32],"incorrect":[33],"annotations":[34,193],"can":[35,47,182],"serious":[37],"consequences,":[38],"making":[39],"it":[40],"essential":[41],"to":[42,65],"determine":[43],"when":[44,69],"a":[45,52,184,190],"prediction":[46,54],"be":[48],"trusted.":[49],"We":[50,74,107,170],"introduce":[51],"selective":[53],"framework":[55,82],"structure":[58],"retrieval":[59,152],"MS/MS":[61],"spectra,":[62],"enabling":[63],"models":[64],"abstain":[66],"predictions":[68],"uncertainty":[70,86,95,103,118,146,162],"is":[71],"too":[72],"high.":[73],"formulate":[75],"the":[76,79,129,137],"problem":[77],"within":[78],"risk-coverage":[80,165],"tradeoff":[81],"comprehensively":[84],"evaluate":[85],"quantification":[87],"strategies":[88],"at":[89],"two":[90],"levels":[91],"of":[92,192],"granularity:":[93],"fingerprint-level":[94,145],"over":[96,104],"predicted":[97],"fingerprint":[99],"bits,":[100],"retrieval-level":[102,160],"candidate":[105],"rankings.":[106],"compare":[108],"scoring":[109],"functions":[110],"including":[111],"first-order":[112,156],"confidence":[113,157],"measures,":[114],"aleatoric":[115,161],"epistemic":[117],"estimates":[119],"second-order":[121],"distributions,":[122],"well":[124],"distance-based":[126],"measures":[127,158],"in":[128],"latent":[130],"space.":[131],"All":[132],"experiments":[133],"are":[134,148],"conducted":[135],"on":[136],"MassSpecGym":[138],"benchmark.":[139],"Our":[140],"analysis":[141],"reveals":[142],"that":[143,172,195],"while":[144],"scores":[147],"poor":[149],"proxies":[150],"success,":[153],"computationally":[154],"inexpensive":[155],"achieve":[163],"strong":[164],"tradeoffs":[166],"across":[167],"evaluation":[168],"settings.":[169],"demonstrate":[171],"by":[173],"applying":[174],"distribution-free":[175],"risk":[176],"control":[177],"via":[178],"generalization":[179],"bounds,":[180],"practitioners":[181],"specify":[183],"tolerable":[185],"rate":[187],"obtain":[189],"subset":[191],"satisfying":[194],"constraint":[196],"with":[197],"high":[198],"probability.":[199]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-13T00:00:00"}
