{"id":"https://openalex.org/W4412187441","doi":"https://doi.org/10.1186/s12859-025-06194-1","title":"An evaluation methodology for machine learning-based tandem mass spectra similarity prediction","display_name":"An evaluation methodology for machine learning-based tandem mass spectra similarity prediction","publication_year":2025,"publication_date":"2025-07-11","ids":{"openalex":"https://openalex.org/W4412187441","doi":"https://doi.org/10.1186/s12859-025-06194-1","pmid":"https://pubmed.ncbi.nlm.nih.gov/40646448"},"language":"en","primary_location":{"id":"doi:10.1186/s12859-025-06194-1","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06194-1","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/s12859-025-06194-1","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/s12859-025-06194-1","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110340971","display_name":"Michael Strobel","orcid":"https://orcid.org/0009-0000-3829-0048"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michael Strobel","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California Riverside, 900 University Ave., Riverside, CA, 92521, USA"],"raw_orcid":"https://orcid.org/0009-0000-3829-0048","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California Riverside, 900 University Ave., Riverside, CA, 92521, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048895378","display_name":"Alberto Gil-de-la-Fuente","orcid":"https://orcid.org/0000-0002-5951-1601"},"institutions":[{"id":"https://openalex.org/I118091203","display_name":"Universidad San Pablo CEU","ror":"https://ror.org/00tvate34","country_code":"ES","type":"education","lineage":["https://openalex.org/I118091203","https://openalex.org/I2801318690"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Alberto Gil-de-la-Fuente","raw_affiliation_strings":["Information Technologies Department, Escuela Polit\u00e9cnica Superior, Universidad San Pablo-CEU, CEU Universities, Urbanizaci\u00f3n Montepr\u00edncipe, Boadilla Del monte, 28668, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0002-5951-1601","affiliations":[{"raw_affiliation_string":"Information Technologies Department, Escuela Polit\u00e9cnica Superior, Universidad San Pablo-CEU, CEU Universities, Urbanizaci\u00f3n Montepr\u00edncipe, Boadilla Del monte, 28668, Madrid, Spain","institution_ids":["https://openalex.org/I118091203"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013976445","display_name":"Mohammad Reza Zare Shahneh","orcid":"https://orcid.org/0000-0002-5760-3190"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammad Reza Zare Shahneh","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California Riverside, 900 University Ave., Riverside, CA, 92521, USA"],"raw_orcid":"https://orcid.org/0000-0002-5760-3190","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California Riverside, 900 University Ave., Riverside, CA, 92521, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010916495","display_name":"Yasin El Abiead","orcid":"https://orcid.org/0000-0003-4392-7706"},"institutions":[{"id":"https://openalex.org/I6750721","display_name":"University of Montana","ror":"https://ror.org/0078xmk34","country_code":"US","type":"education","lineage":["https://openalex.org/I6750721"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yasin El Abiead","raw_affiliation_strings":["Skaggs School of Pharmacy and Pharmaceutical Science, University of California San Diego, 9255 Pharmacy Ln, San Diego, CA, 92093, USA"],"raw_orcid":"https://orcid.org/0000-0003-4392-7706","affiliations":[{"raw_affiliation_string":"Skaggs School of Pharmacy and Pharmaceutical Science, University of California San Diego, 9255 Pharmacy Ln, San Diego, CA, 92093, USA","institution_ids":["https://openalex.org/I6750721"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067584589","display_name":"Roman Bushuiev","orcid":"https://orcid.org/0000-0003-1769-1509"},"institutions":[{"id":"https://openalex.org/I4210145889","display_name":"Czech Academy of Sciences, Institute of Organic Chemistry and Biochemistry","ror":"https://ror.org/04nfjn472","country_code":"CZ","type":"facility","lineage":["https://openalex.org/I202391551","https://openalex.org/I4210145889"]},{"id":"https://openalex.org/I4210152232","display_name":"Institute of Informatics of the Slovak Academy of Sciences","ror":"https://ror.org/04jgqpc26","country_code":"SK","type":"facility","lineage":["https://openalex.org/I207624831","https://openalex.org/I4210152232"]}],"countries":["CZ","SK"],"is_corresponding":false,"raw_author_name":"Roman Bushuiev","raw_affiliation_strings":["Czech Institute of Informatics, Robotics and Cybernetics, Jugosl\u00e1vsk\u00fdch partyz\u00e1n\u016f 1580/3, Prague, 16000, Czech Republic","Institute of Organic Chemistry and Biochemistry, Czech Academy of Sciences, Flemingovo n\u00e1m. 542/2, Prague, 16000, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0003-1769-1509","affiliations":[{"raw_affiliation_string":"Czech Institute of Informatics, Robotics and Cybernetics, Jugosl\u00e1vsk\u00fdch partyz\u00e1n\u016f 1580/3, Prague, 16000, Czech Republic","institution_ids":["https://openalex.org/I4210152232"]},{"raw_affiliation_string":"Institute of Organic Chemistry and Biochemistry, Czech Academy of Sciences, Flemingovo n\u00e1m. 542/2, Prague, 16000, Czech Republic","institution_ids":["https://openalex.org/I4210145889"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093055707","display_name":"Anton Bushuiev","orcid":"https://orcid.org/0009-0007-4783-6584"},"institutions":[{"id":"https://openalex.org/I4210152232","display_name":"Institute of Informatics of the Slovak Academy of Sciences","ror":"https://ror.org/04jgqpc26","country_code":"SK","type":"facility","lineage":["https://openalex.org/I207624831","https://openalex.org/I4210152232"]}],"countries":["SK"],"is_corresponding":false,"raw_author_name":"Anton Bushuiev","raw_affiliation_strings":["Czech Institute of Informatics, Robotics and Cybernetics, Jugosl\u00e1vsk\u00fdch partyz\u00e1n\u016f 1580/3, Prague, 16000, Czech Republic"],"raw_orcid":"https://orcid.org/0009-0007-4783-6584","affiliations":[{"raw_affiliation_string":"Czech Institute of Informatics, Robotics and Cybernetics, Jugosl\u00e1vsk\u00fdch partyz\u00e1n\u016f 1580/3, Prague, 16000, Czech Republic","institution_ids":["https://openalex.org/I4210152232"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088461185","display_name":"Tom\u00e1\u0161 Pluskal","orcid":"https://orcid.org/0000-0002-6940-3006"},"institutions":[{"id":"https://openalex.org/I4210145889","display_name":"Czech Academy of Sciences, Institute of Organic Chemistry and Biochemistry","ror":"https://ror.org/04nfjn472","country_code":"CZ","type":"facility","lineage":["https://openalex.org/I202391551","https://openalex.org/I4210145889"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Tom\u00e1\u0161 Pluskal","raw_affiliation_strings":["Institute of Organic Chemistry and Biochemistry, Czech Academy of Sciences, Flemingovo n\u00e1m. 542/2, Prague, 16000, Czech Republic"],"raw_orcid":"https://orcid.org/0000-0002-6940-3006","affiliations":[{"raw_affiliation_string":"Institute of Organic Chemistry and Biochemistry, Czech Academy of Sciences, Flemingovo n\u00e1m. 542/2, Prague, 16000, Czech Republic","institution_ids":["https://openalex.org/I4210145889"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058074901","display_name":"Mingxun Wang","orcid":"https://orcid.org/0000-0001-7647-6097"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingxun Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California Riverside, 900 University Ave., Riverside, CA, 92521, USA. mingxun.wang@cs.ucr.edu"],"raw_orcid":"https://orcid.org/0000-0001-7647-6097","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California Riverside, 900 University Ave., Riverside, CA, 92521, USA. mingxun.wang@cs.ucr.edu","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5110340971"],"corresponding_institution_ids":["https://openalex.org/I103635307"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":6.5177,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.96750422,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"26","issue":"1","first_page":"174","last_page":"174"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.7802000045776367,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.7802000045776367,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.10520000010728836,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.029899999499320984,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7900583744049072},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5996513366699219},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5594407320022583},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.5536921620368958},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5525392293930054},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5184242725372314},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.500765323638916},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4775075316429138},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.46430259943008423},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.4403077960014343},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.43759840726852417},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.42628008127212524},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3200100064277649},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.10671466588973999},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10012790560722351}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7900583744049072},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5996513366699219},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5594407320022583},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.5536921620368958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5525392293930054},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5184242725372314},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.500765323638916},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4775075316429138},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.46430259943008423},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.4403077960014343},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.43759840726852417},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.42628008127212524},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3200100064277649},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.10671466588973999},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10012790560722351},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D053719","descriptor_name":"Tandem Mass Spectrometry","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D053719","descriptor_name":"Tandem Mass Spectrometry","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D053719","descriptor_name":"Tandem Mass Spectrometry","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true}],"locations_count":6,"locations":[{"id":"doi:10.1186/s12859-025-06194-1","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06194-1","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/s12859-025-06194-1","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:40646448","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40646448","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:escholarship.org:ark:/13030/qt5kw514nz","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/5kw514nz","pdf_url":"https://escholarship.org/content/qt5kw514nz/qt5kw514nz.pdf","source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, vol 26, iss 1","raw_type":"article"},{"id":"pmh:ark:/13030/qt5kw514nz","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"pmh:oai:doaj.org/article:aa840295040448f3b42d4595e15fda2e","is_oa":true,"landing_page_url":"https://doaj.org/article/aa840295040448f3b42d4595e15fda2e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 26, Iss 1, Pp 1-17 (2025)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:12247221","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12247221","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s12859-025-06194-1","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06194-1","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/s12859-025-06194-1","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5,"display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G1286236842","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G1723136494","display_name":null,"funder_award_id":"1R03OD034493-01","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G2314896242","display_name":null,"funder_award_id":"5U24DK133658-02","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G3083819904","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3944918260","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G4501827968","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G4565140552","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G498139845","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G5602503670","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320316170","funder_display_name":"Joint Genome Institute"},{"id":"https://openalex.org/G6348972864","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6558272803","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7205239661","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320316170","funder_display_name":"Joint Genome Institute"},{"id":"https://openalex.org/G7538560101","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G969889393","display_name":null,"funder_award_id":"DE-AC02-","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320316170","display_name":"Joint Genome Institute","ror":"https://ror.org/04xm1d337"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412187441.pdf","grobid_xml":"https://content.openalex.org/works/W4412187441.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W2028421938","https://openalex.org/W2050226017","https://openalex.org/W2059327215","https://openalex.org/W2068238590","https://openalex.org/W2144803530","https://openalex.org/W2150884987","https://openalex.org/W2151697120","https://openalex.org/W2412446857","https://openalex.org/W2504691963","https://openalex.org/W2605897695","https://openalex.org/W2909063104","https://openalex.org/W2990138404","https://openalex.org/W3080789666","https://openalex.org/W3081878206","https://openalex.org/W3129424480","https://openalex.org/W3171010532","https://openalex.org/W3209680996","https://openalex.org/W3216027541","https://openalex.org/W4287123779","https://openalex.org/W4290831374","https://openalex.org/W4376270841","https://openalex.org/W4387782510","https://openalex.org/W4399929809","https://openalex.org/W4401091692","https://openalex.org/W4401506503","https://openalex.org/W4402082179","https://openalex.org/W4402569794","https://openalex.org/W4404346391","https://openalex.org/W4410621552"],"related_works":["https://openalex.org/W2118717649","https://openalex.org/W2413243053","https://openalex.org/W410723623","https://openalex.org/W2015341305","https://openalex.org/W2035068594","https://openalex.org/W4225593417","https://openalex.org/W2573498121","https://openalex.org/W3022298670","https://openalex.org/W3160494304","https://openalex.org/W2388888344"],"abstract_inverted_index":{"BACKGROUND:":[0],"Untargeted":[1],"tandem":[2,26],"mass":[3,27],"spectrometry":[4,28],"serves":[5],"as":[6,68,266],"a":[7,44,69,95,100,141,146,172,237,295],"scalable":[8],"solution":[9],"for":[10,22,151,253,271,294],"the":[11,18,24,51,81,87,138,152,216,219,225,267,287],"organization":[12],"of":[13,17,47,53,102,140,154,161,218,241,269,290,298],"small":[14],"molecules.":[15],"One":[16],"most":[19],"prevalent":[20],"techniques":[21],"analyzing":[23],"acquired":[25],"data":[29,119,125,248],"(MS/MS)":[30],"-":[31,35],"called":[32],"molecular":[33],"networking":[34],"organizes":[36],"and":[37,107,112,123,166,174,184,214,250,279],"visualizes":[38],"putatively":[39],"structurally":[40],"related":[41],"compounds.":[42],"However,":[43,86],"key":[45],"bottleneck":[46],"this":[48,134,262],"approach":[49],"is":[50,99,258],"comparison":[52,88,281],"MS/MS":[54,77,109,277],"spectra":[55],"used":[56],"to":[57,72,104,128,188],"identify":[58],"nearby":[59],"structural":[60,74,162],"neighbors.":[61],"Machine":[62],"learning":[63,156,274],"(ML)":[64],"approaches":[65,275],"have":[66],"emerged":[67],"promising":[70],"technique":[71],"predict":[73],"similarity":[75,110,163,278],"from":[76],"that":[78,117,149,177,200,227,261,286],"may":[79],"surpass":[80],"current":[82],"state-of-the-art":[83],"algorithmic":[84],"methods.":[85],"between":[89,121,164,282],"these":[90],"different":[91],"ML":[92],"methods":[93,116,199],"remains":[94],"challenge":[96],"because":[97],"there":[98,113],"lack":[101],"standardization":[103],"benchmark,":[105],"evaluate,":[106],"compare":[108],"methods,":[111],"are":[114],"no":[115],"address":[118],"leakage":[120],"training":[122,165,173,198],"test":[124,167],"in":[126,231,276],"order":[127],"analyze":[129],"model":[130],"generalizability.":[131],"RESULT:":[132],"In":[133],"work,":[135],"we":[136,235],"present":[137],"creation":[139],"new":[142],"evaluation":[143,153,175,291],"methodology":[144],"using":[145],"train/test":[147],"split":[148],"allows":[150,293],"machine":[155,273],"models":[157],"at":[158],"varying":[159],"degrees":[160],"sets.":[168],"We":[169,192,222,284],"also":[170],"introduce":[171],"framework":[176],"measures":[178],"prediction":[179,232],"accuracy":[180],"on":[181],"domain-inspired":[182],"annotation":[183],"retrieval":[185],"metrics":[186,292],"designed":[187],"mirror":[189],"real-world":[190],"applications.":[191],"further":[193],"show":[194],"how":[195],"two":[196],"alternative":[197],"leverage":[201],"MS":[202],"specific":[203],"insights":[204],"(e.g.,":[205],"similar":[206],"instrumentation,":[207],"collision":[208,228],"energy,":[209],"adduct)":[210],"affect":[211],"method":[212],"performance":[213],"demonstrate":[215],"orthogonality":[217],"proposed":[220],"metrics.":[221],"especially":[223],"highlight":[224],"role":[226],"energy":[229],"plays":[230],"errors.":[233],"Finally,":[234],"release":[236],"continually":[238],"updated":[239],"version":[240],"our":[242,247,259],"dataset":[243],"online":[244],"along":[245],"with":[246],"cleaning":[249],"splitting":[251],"pipelines":[252],"community":[254],"use.":[255],"CONCLUSION:":[256],"It":[257],"hope":[260],"benchmark":[263],"will":[264],"serve":[265],"basis":[268],"development":[270],"future":[272],"facilitate":[280],"models.":[283],"anticipate":[285],"introduced":[288],"set":[289],"better":[296],"reflection":[297],"practical":[299],"performance.":[300]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":2}],"updated_date":"2026-05-04T08:30:34.212998","created_date":"2025-10-10T00:00:00"}
