{"id":"https://openalex.org/W7160273116","doi":"https://doi.org/10.48550/arxiv.2605.01945","title":"PepSpecBench: A Unified Evaluation Benchmark for Peptide Tandem Mass Spectrometry Prediction","display_name":"PepSpecBench: A Unified Evaluation Benchmark for Peptide Tandem Mass Spectrometry Prediction","publication_year":2026,"publication_date":"2026-05-03","ids":{"openalex":"https://openalex.org/W7160273116","doi":"https://doi.org/10.48550/arxiv.2605.01945"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.01945","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01945","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.01945","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027255549","display_name":"Zhiwen Yang","orcid":"https://orcid.org/0000-0002-6357-3194"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zhiwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135343166","display_name":"Pan Liu","orcid":"https://orcid.org/0009-0003-7919-5110"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Pan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135333372","display_name":"Yifan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yifan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114221337","display_name":"Yunhua Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Yunhua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135370659","display_name":"Jun Xia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10683","display_name":"Mass Spectrometry Techniques and Applications","score":0.00860000029206276,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.0027000000700354576,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7202000021934509},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.661899983882904},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6391000151634216},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.46219998598098755},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.39250001311302185},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.35749998688697815},{"id":"https://openalex.org/keywords/data-driven","display_name":"Data-driven","score":0.33899998664855957}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7202000021934509},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6976000070571899},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.661899983882904},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6391000151634216},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.541100025177002},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.46219998598098755},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4343999922275543},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4309999942779541},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.39250001311302185},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C31827203","wikidata":"https://www.wikidata.org/wiki/Q874296","display_name":"Tandem mass spectrometry","level":3,"score":0.3093000054359436},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3012000024318695},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.28529998660087585},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.27090001106262207},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2637999951839447},{"id":"https://openalex.org/C45493050","wikidata":"https://www.wikidata.org/wiki/Q7884934","display_name":"Unified Model","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.01945","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01945","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.01945","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01945","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land","score":0.47171491384506226}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Tandem":[0],"mass":[1],"spectrometry":[2],"provides":[3],"a":[4,24,108,125,139,147],"high-throughput":[5],"framework":[6],"for":[7,111,181],"identifying":[8],"and":[9,35,60,79,91,134,152,162,171,186],"quantifying":[10],"proteins":[11],"in":[12],"complex":[13],"biological":[14],"samples.":[15],"In":[16],"computational":[17],"proteomics,":[18],"predicting":[19],"peptide":[20,33,112],"MS/MS":[21,113],"spectra":[22],"is":[23],"critical":[25],"task,":[26],"enabling":[27],"downstream":[28],"applications":[29],"such":[30],"as":[31],"large-scale":[32],"identification":[34],"quantification.":[36],"While":[37],"deep":[38],"learning":[39],"architectures":[40,137],"have":[41],"substantially":[42],"improved":[43],"prediction":[44],"accuracy,":[45],"three":[46],"evaluation":[47,150,185],"challenges":[48],"obscure":[49],"the":[50,54],"true":[51],"progress":[52],"of":[53,94],"field.":[55],"First,":[56],"inconsistent":[57],"data":[58,71,118],"preprocessing":[59,119],"incompatible":[61],"model":[62,67,95,160,183],"output":[63],"spaces":[64],"hinder":[65],"fair":[66],"comparison.":[68],"Second,":[69],"flawed":[70],"splitting":[72,128],"strategies":[73],"can":[74],"permit":[75],"hidden":[76],"sequence":[77,132],"leakage":[78],"inflate":[80],"reported":[81],"performance.":[82],"Third,":[83],"existing":[84],"evaluations":[85],"typically":[86],"lack":[87],"comprehensive":[88,148],"cross-species":[89],"benchmarking":[90],"systematic":[92],"assessment":[93],"robustness":[96,161,172],"to":[97,130,158],"influential":[98],"experimental":[99],"conditions.":[100],"To":[101],"address":[102],"these":[103],"challenges,":[104],"we":[105],"propose":[106],"PepSpecBench,":[107],"unified":[109],"benchmark":[110],"spectrum":[114],"prediction.":[115],"PepSpecBench":[116],"standardizes":[117],"across":[120,174],"complementary":[121],"public":[122],"datasets,":[123],"enforces":[124],"strict":[126],"backbone-disjoint":[127],"strategy":[129],"eliminate":[131],"leakage,":[133],"evaluates":[135],"diverse":[136],"within":[138],"shared":[140],"fragment-ion":[141],"representation":[142],"space.":[143],"It":[144],"further":[145],"introduces":[146],"multi-species":[149],"suite":[151],"physically":[153],"grounded":[154],"metadata":[155],"perturbation":[156],"probes":[157],"assess":[159],"instrument":[163],"awareness.":[164],"We":[165],"uncover":[166],"previously":[167],"unrecognized":[168],"performance":[169],"discrepancies":[170],"limitations":[173],"six":[175],"representative":[176],"models,":[177],"providing":[178],"actionable":[179],"insights":[180],"future":[182],"design,":[184],"practical":[187],"deployment.":[188]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-06T00:00:00"}
