{"id":"https://openalex.org/W7130615068","doi":"https://doi.org/10.48550/arxiv.2602.16507","title":"Small molecule retrieval from tandem mass spectrometry: what are we optimizing for?","display_name":"Small molecule retrieval from tandem mass spectrometry: what are we optimizing for?","publication_year":2026,"publication_date":"2026-02-18","ids":{"openalex":"https://openalex.org/W7130615068","doi":"https://doi.org/10.48550/arxiv.2602.16507"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.16507","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052622361","display_name":"Gaetan De Waele","orcid":"https://orcid.org/0000-0003-0367-9699"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"De Waele, Gaetan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126442846","display_name":"Marek Wydmuch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wydmuch, Marek","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Dembczy\u0144ski, Krzysztof","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dembczy\u0144ski, Krzysztof","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126394042","display_name":"Wojciech Kot\u0142owski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kot\u0142owski, Wojciech","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5028945060","display_name":"Willem Waegeman","orcid":"https://orcid.org/0000-0002-5950-3003"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Waegeman, Willem","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.548799991607666,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.548799991607666,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10683","display_name":"Mass Spectrometry Techniques and Applications","score":0.11429999768733978,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13192","display_name":"Forensic Fingerprint Detection Methods","score":0.11349999904632568,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fingerprint","display_name":"Fingerprint (computing)","score":0.6504999995231628},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5823000073432922},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5206000208854675},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.4702000021934509},{"id":"https://openalex.org/keywords/tandem","display_name":"Tandem","score":0.43290001153945923},{"id":"https://openalex.org/keywords/small-molecule","display_name":"Small molecule","score":0.40049999952316284}],"concepts":[{"id":"https://openalex.org/C2777826928","wikidata":"https://www.wikidata.org/wiki/Q3745713","display_name":"Fingerprint (computing)","level":2,"score":0.6504999995231628},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5871999859809875},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5823000073432922},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5206000208854675},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.508400022983551},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.4702000021934509},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4404999911785126},{"id":"https://openalex.org/C2777814067","wikidata":"https://www.wikidata.org/wiki/Q1752317","display_name":"Tandem","level":2,"score":0.43290001153945923},{"id":"https://openalex.org/C161624437","wikidata":"https://www.wikidata.org/wiki/Q1988322","display_name":"Small molecule","level":2,"score":0.40049999952316284},{"id":"https://openalex.org/C31827203","wikidata":"https://www.wikidata.org/wiki/Q874296","display_name":"Tandem mass spectrometry","level":3,"score":0.3725999891757965},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3513000011444092},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.3305000066757202},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3301999866962433},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3140999972820282},{"id":"https://openalex.org/C162356407","wikidata":"https://www.wikidata.org/wiki/Q180809","display_name":"Mass spectrometry","level":2,"score":0.3102000057697296},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3086000084877014},{"id":"https://openalex.org/C2779280203","wikidata":"https://www.wikidata.org/wiki/Q17121211","display_name":"Small data","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C103697762","wikidata":"https://www.wikidata.org/wiki/Q4112105","display_name":"Virtual screening","level":3,"score":0.2937999963760376},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.26649999618530273}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.16507","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.16507","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.16507","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.16507","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"One":[0],"of":[1,9,116,148],"the":[2,6,19,22,113,145],"central":[3],"challenges":[4],"in":[5,59,70],"computational":[7],"analysis":[8,139],"liquid":[10],"chromatography-tandem":[11],"mass":[12,49],"spectrometry":[13],"(LC-MS/MS)":[14],"data":[15],"is":[16,30,52],"to":[17,55],"identify":[18],"compounds":[20],"underlying":[21],"output":[23],"spectra.":[24],"In":[25,83],"recent":[26],"years,":[27],"this":[28,84,141],"problem":[29],"increasingly":[31],"tackled":[32],"using":[33],"deep":[34],"learning":[35],"methods.":[36],"A":[37],"common":[38],"strategy":[39],"involves":[40],"predicting":[41],"a":[42,60,109],"molecular":[43,122],"fingerprint":[44,118,128,157],"vector":[45],"from":[46],"an":[47],"input":[48],"spectrum,":[50],"which":[51],"then":[53],"used":[54,89],"search":[56],"for":[57,101,125,153],"matches":[58],"chemical":[61],"compound":[62],"database.":[63],"While":[64],"various":[65],"loss":[66,90,154],"functions":[67],"are":[68],"employed":[69],"training":[71],"these":[72,102],"predictive":[73],"models,":[74],"their":[75],"impact":[76],"on":[77,144],"model":[78],"performance":[79],"remains":[80],"poorly":[81],"understood.":[82],"study,":[85],"we":[86],"investigate":[87],"commonly":[88],"functions,":[91],"deriving":[92],"novel":[93],"regret":[94],"bounds":[95],"that":[96],"characterize":[97],"when":[98],"Bayes-optimal":[99],"decisions":[100],"objectives":[103,115],"must":[104],"diverge.":[105],"Our":[106,137],"results":[107],"reveal":[108],"fundamental":[110],"trade-off":[111,142],"between":[112],"two":[114],"(1)":[117],"similarity":[119,146],"and":[120,134,156],"(2)":[121],"retrieval.":[123],"Optimizing":[124],"more":[126],"accurate":[127],"predictions":[129],"typically":[130],"worsens":[131],"retrieval":[132],"results,":[133],"vice":[135],"versa.":[136],"theoretical":[138],"shows":[140],"depends":[143],"structure":[147],"candidate":[149],"sets,":[150],"providing":[151],"guidance":[152],"function":[155],"selection.":[158]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-20T00:00:00"}
