{"id":"https://openalex.org/W4417448167","doi":"https://doi.org/10.48550/arxiv.2512.12272","title":"Accurate de novo sequencing of the modified proteome with OmniNovo","display_name":"Accurate de novo sequencing of the modified proteome with OmniNovo","publication_year":2025,"publication_date":"2025-12-13","ids":{"openalex":"https://openalex.org/W4417448167","doi":"https://doi.org/10.48550/arxiv.2512.12272"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2512.12272","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12272","pdf_url":"https://arxiv.org/pdf/2512.12272","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.12272","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100650502","display_name":"Yuhan Chen","orcid":"https://orcid.org/0000-0002-2950-2675"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Yuhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110973527","display_name":"Shang Qu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Shang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101784724","display_name":"Zhiqiang Gao","orcid":"https://orcid.org/0000-0001-9103-6698"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Zhiqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035643880","display_name":"Yuejin Yang","orcid":"https://orcid.org/0000-0002-1309-2667"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yuejin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100368866","display_name":"Xiang Zhang","orcid":"https://orcid.org/0000-0002-5234-9410"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066371054","display_name":"Sheng Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Sheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040228384","display_name":"Xinjie Mao","orcid":"https://orcid.org/0000-0001-9446-548X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Xinjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091148317","display_name":"Liujia Qian","orcid":"https://orcid.org/0000-0002-0870-7479"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Liujia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067111361","display_name":"Jiaqi Wei","orcid":"https://orcid.org/0009-0006-3268-2837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Jiaqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102891552","display_name":"Zijie Qiu","orcid":"https://orcid.org/0009-0007-2461-2641"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Zijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"You, Chenyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"You, Chenyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028486493","display_name":"Lei Bai","orcid":"https://orcid.org/0000-0003-3378-7201"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Lei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037110401","display_name":"Ning Ding","orcid":"https://orcid.org/0000-0002-3689-0696"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Ning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002051017","display_name":"Tiannan Guo","orcid":"https://orcid.org/0000-0003-3869-7651"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Tiannan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004875850","display_name":"Bowen Zhou","orcid":"https://orcid.org/0000-0003-4421-9787"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Bowen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102815325","display_name":"Siqi Sun","orcid":"https://orcid.org/0000-0002-9592-3670"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Siqi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":16,"corresponding_author_ids":["https://openalex.org/A5100650502"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.5256999731063843,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.5256999731063843,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.2621999979019165,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.07249999791383743,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/proteome","display_name":"Proteome","score":0.7092000246047974},{"id":"https://openalex.org/keywords/decipher","display_name":"DECIPHER","score":0.6288999915122986},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.6220999956130981},{"id":"https://openalex.org/keywords/human-proteome-project","display_name":"Human proteome project","score":0.5475000143051147},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4851999878883362},{"id":"https://openalex.org/keywords/false-discovery-rate","display_name":"False discovery rate","score":0.48399999737739563},{"id":"https://openalex.org/keywords/human-proteins","display_name":"Human proteins","score":0.44029998779296875},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.41280001401901245},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4007999897003174}],"concepts":[{"id":"https://openalex.org/C104397665","wikidata":"https://www.wikidata.org/wiki/Q860947","display_name":"Proteome","level":2,"score":0.7092000246047974},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.7084000110626221},{"id":"https://openalex.org/C164614171","wikidata":"https://www.wikidata.org/wiki/Q5204775","display_name":"DECIPHER","level":2,"score":0.6288999915122986},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.6220999956130981},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5629000067710876},{"id":"https://openalex.org/C94795543","wikidata":"https://www.wikidata.org/wiki/Q3813754","display_name":"Human proteome project","level":4,"score":0.5475000143051147},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4851999878883362},{"id":"https://openalex.org/C193244246","wikidata":"https://www.wikidata.org/wiki/Q5432696","display_name":"False discovery rate","level":3,"score":0.48399999737739563},{"id":"https://openalex.org/C2911029443","wikidata":"https://www.wikidata.org/wiki/Q21296145","display_name":"Human proteins","level":3,"score":0.44029998779296875},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.41280001401901245},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4007999897003174},{"id":"https://openalex.org/C46111723","wikidata":"https://www.wikidata.org/wiki/Q471857","display_name":"Proteomics","level":3,"score":0.39989998936653137},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.3756999969482422},{"id":"https://openalex.org/C145741570","wikidata":"https://www.wikidata.org/wiki/Q7251534","display_name":"Proteogenomics","level":5,"score":0.3488999903202057},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.335999995470047},{"id":"https://openalex.org/C191015642","wikidata":"https://www.wikidata.org/wiki/Q1132459","display_name":"Fragmentation (computing)","level":2,"score":0.33059999346733093},{"id":"https://openalex.org/C100631289","wikidata":"https://www.wikidata.org/wiki/Q898362","display_name":"Posttranslational modification","level":3,"score":0.31360000371932983},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.30410000681877136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2980000078678131},{"id":"https://openalex.org/C111364199","wikidata":"https://www.wikidata.org/wiki/Q2915896","display_name":"Protein methods","level":4,"score":0.2978000044822693},{"id":"https://openalex.org/C31827203","wikidata":"https://www.wikidata.org/wiki/Q874296","display_name":"Tandem mass spectrometry","level":3,"score":0.2777000069618225},{"id":"https://openalex.org/C2779696439","wikidata":"https://www.wikidata.org/wiki/Q7512811","display_name":"Signature (topology)","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26739999651908875},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C197077220","wikidata":"https://www.wikidata.org/wiki/Q720988","display_name":"Human genome","level":4,"score":0.2524999976158142},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.2522999942302704}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2512.12272","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12272","pdf_url":"https://arxiv.org/pdf/2512.12272","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.12272","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.12272","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.12272","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12272","pdf_url":"https://arxiv.org/pdf/2512.12272","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Post-translational":[0],"modifications":[1],"(PTMs)":[2],"serve":[3],"as":[4],"a":[5,19,32,50,86,92,114],"dynamic":[6],"chemical":[7],"language":[8],"regulating":[9],"protein":[10],"function,":[11],"yet":[12],"current":[13],"proteomic":[14],"methods":[15],"remain":[16],"blind":[17],"to":[18,72,81,123],"vast":[20],"portion":[21],"of":[22,35,41,58,133,141],"the":[23,39,120,130,134],"modified":[24,61],"proteome.":[25],"Standard":[26],"database":[27],"search":[28,36],"algorithms":[29],"suffer":[30],"from":[31,64],"combinatorial":[33],"explosion":[34],"spaces,":[37],"limiting":[38],"identification":[40],"uncharacterized":[42],"or":[43],"complex":[44],"modifications.":[45],"Here":[46],"we":[47],"introduce":[48],"OmniNovo,":[49],"unified":[51],"deep":[52],"learning":[53],"framework":[54],"for":[55],"reference-free":[56],"sequencing":[57],"unmodified":[59],"and":[60,136],"peptides":[62,109],"directly":[63],"tandem":[65],"mass":[66],"spectra.":[67],"Unlike":[68],"existing":[69],"tools":[70],"restricted":[71],"specific":[73],"modification":[74],"types,":[75],"OmniNovo":[76,102],"learns":[77],"universal":[78],"fragmentation":[79],"rules":[80],"decipher":[82],"diverse":[83],"PTMs":[84],"within":[85],"single":[87],"coherent":[88],"model.":[89],"By":[90],"integrating":[91],"mass-constrained":[93],"decoding":[94],"algorithm":[95],"with":[96],"rigorous":[97],"false":[98,116],"discovery":[99,117],"rate":[100],"estimation,":[101],"achieves":[103],"state-of-the-art":[104],"accuracy,":[105],"identifying":[106],"51\\%":[107],"more":[108],"than":[110],"standard":[111],"approaches":[112],"at":[113],"1\\%":[115],"rate.":[118],"Crucially,":[119],"model":[121],"generalizes":[122],"biological":[124],"sites":[125],"unseen":[126],"during":[127],"training,":[128],"illuminating":[129],"dark":[131],"matter":[132],"proteome":[135],"enabling":[137],"unbiased":[138],"comprehensive":[139],"analysis":[140],"cellular":[142],"regulation.":[143]},"counts_by_year":[],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-12-17T00:00:00"}
