{"id":"https://openalex.org/W7118876816","doi":"https://doi.org/10.48550/arxiv.2601.00941","title":"Comparative Analysis of Formula and Structure Prediction from Tandem Mass Spectra","display_name":"Comparative Analysis of Formula and Structure Prediction from Tandem Mass Spectra","publication_year":2026,"publication_date":"2026-01-02","ids":{"openalex":"https://openalex.org/W7118876816","doi":"https://doi.org/10.48550/arxiv.2601.00941"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.00941","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00941","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.00941","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122032743","display_name":"Xujun Che","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Che, Xujun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122053914","display_name":"Xiuxia Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Xiuxia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100730268","display_name":"Depeng Xu","orcid":"https://orcid.org/0000-0002-0371-1815"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Depeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.0017000000225380063,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.0010999999940395355,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metabolomics","display_name":"Metabolomics","score":0.6466000080108643},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6014999747276306},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.49959999322891235},{"id":"https://openalex.org/keywords/tandem","display_name":"Tandem","score":0.46939998865127563},{"id":"https://openalex.org/keywords/tandem-mass-spectrometry","display_name":"Tandem mass spectrometry","score":0.4375999867916107},{"id":"https://openalex.org/keywords/characterization","display_name":"Characterization (materials science)","score":0.3993000090122223},{"id":"https://openalex.org/keywords/mass-spectrometry","display_name":"Mass spectrometry","score":0.35989999771118164}],"concepts":[{"id":"https://openalex.org/C21565614","wikidata":"https://www.wikidata.org/wiki/Q12149006","display_name":"Metabolomics","level":2,"score":0.6466000080108643},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6014999747276306},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5169000029563904},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5076000094413757},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.49959999322891235},{"id":"https://openalex.org/C2777814067","wikidata":"https://www.wikidata.org/wiki/Q1752317","display_name":"Tandem","level":2,"score":0.46939998865127563},{"id":"https://openalex.org/C31827203","wikidata":"https://www.wikidata.org/wiki/Q874296","display_name":"Tandem mass spectrometry","level":3,"score":0.4375999867916107},{"id":"https://openalex.org/C2780841128","wikidata":"https://www.wikidata.org/wiki/Q5073781","display_name":"Characterization (materials science)","level":2,"score":0.3993000090122223},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.374099999666214},{"id":"https://openalex.org/C162356407","wikidata":"https://www.wikidata.org/wiki/Q180809","display_name":"Mass spectrometry","level":2,"score":0.35989999771118164},{"id":"https://openalex.org/C118530786","wikidata":"https://www.wikidata.org/wiki/Q1134732","display_name":"Instrumentation (computer programming)","level":2,"score":0.3573000133037567},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3151000142097473},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3118000030517578},{"id":"https://openalex.org/C64489805","wikidata":"https://www.wikidata.org/wiki/Q873864","display_name":"Liquid chromatography\u2013mass spectrometry","level":3,"score":0.30410000681877136},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.2994999885559082},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C99726746","wikidata":"https://www.wikidata.org/wiki/Q906396","display_name":"Chemical space","level":3,"score":0.2892000079154968},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2793000042438507},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.2630000114440918}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.00941","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00941","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.00941","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00941","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Liquid":[0],"chromatography":[1],"mass":[2,55,121],"spectrometry":[3],"(LC-MS)-based":[4],"metabolomics":[5,104],"and":[6,25,28,34,41,53,98,105,131,140,164,185],"exposomics":[7,42],"aim":[8],"to":[9,115,188],"measure":[10],"detectable":[11],"small":[12],"molecules":[13],"in":[14],"biological":[15],"samples.":[16],"The":[17,174],"results":[18],"facilitate":[19],"hypothesis-generating":[20],"discovery":[21],"of":[22,51,58,63,103,109,125,153,161,172],"metabolic":[23],"changes":[24],"disease":[26],"mechanisms":[27],"provide":[29],"information":[30],"about":[31],"environmental":[32],"exposures":[33],"their":[35],"effects":[36],"on":[37,119,194],"human":[38],"health.":[39],"Metabolomics":[40],"are":[43,83],"made":[44],"possible":[45],"by":[46,92],"the":[47,64,87,100,154,159],"high":[48,54],"resolving":[49],"power":[50],"LC":[52],"measurement":[56],"accuracy":[57,160],"MS.":[59,195],"However,":[60],"a":[61,107,150],"majority":[62],"signals":[65],"from":[66,85],"such":[67],"studies":[68],"still":[69],"cannot":[70],"be":[71],"identified":[72,182],"or":[73],"annotated":[74],"using":[75],"conventional":[76],"library":[77],"searching":[78],"because":[79],"existing":[80],"spectral":[81],"libraries":[82],"far":[84],"covering":[86],"vast":[88],"chemical":[89],"space":[90],"captured":[91],"LC-MS/MS.":[93],"To":[94,133],"address":[95],"this":[96],"challenge":[97],"unleash":[99],"full":[101],"potential":[102],"exposomics,":[106],"number":[108],"computational":[110],"approaches":[111,127],"have":[112,147,177],"been":[113],"developed":[114],"predict":[116],"compounds":[117],"based":[118,193],"tandem":[120],"spectra.":[122],"Published":[123],"assessment":[124],"these":[126],"used":[128],"different":[129,170],"datasets":[130],"evaluation.":[132],"select":[134],"prediction":[135,156,163,166],"workflows":[136],"for":[137,143,169],"practical":[138],"applications":[139],"identify":[141],"areas":[142],"further":[144,189],"improvements,":[145],"we":[146],"carried":[148],"out":[149],"systematic":[151],"evaluation":[152],"state-of-the-art":[155],"algorithms.":[157],"Specifically,":[158],"formula":[162],"structure":[165],"was":[167],"evaluated":[168],"types":[171],"adducts.":[173],"resulting":[175],"findings":[176],"established":[178],"realistic":[179],"performance":[180],"baselines,":[181],"critical":[183],"bottlenecks,":[184],"provided":[186],"guidance":[187],"improve":[190],"compound":[191],"predictions":[192]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-08T00:00:00"}
