{"id":"https://openalex.org/W7127316945","doi":"https://doi.org/10.48550/arxiv.2602.01772","title":"DIA-CLIP: a universal representation learning framework for zero-shot DIA proteomics","display_name":"DIA-CLIP: a universal representation learning framework for zero-shot DIA proteomics","publication_year":2026,"publication_date":"2026-02-02","ids":{"openalex":"https://openalex.org/W7127316945","doi":"https://doi.org/10.48550/arxiv.2602.01772"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.01772","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.01772","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124942040","display_name":"Yucheng Liao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liao, Yucheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124924349","display_name":"Han Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124918102","display_name":"Weinan E","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"E, Weinan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124942534","display_name":"Weijie Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Weijie","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5124942040"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10683","display_name":"Mass Spectrometry Techniques and Applications","score":0.00559999980032444,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.0019000000320374966,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.7723000049591064},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.6431000232696533},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.593999981880188},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5044999718666077},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.4440000057220459},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4341000020503998},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3659999966621399}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.7723000049591064},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.708299994468689},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.6431000232696533},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.593999981880188},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5846999883651733},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5044999718666077},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4740999937057495},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.4440000057220459},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4341000020503998},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.36090001463890076},{"id":"https://openalex.org/C2780616401","wikidata":"https://www.wikidata.org/wiki/Q1133673","display_name":"Cornerstone","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C46111723","wikidata":"https://www.wikidata.org/wiki/Q471857","display_name":"Proteomics","level":3,"score":0.3012999892234802},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.25609999895095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.01772","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.01772","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data-independent":[0],"acquisition":[1],"mass":[2],"spectrometry":[3],"(DIA-MS)":[4],"has":[5],"established":[6],"itself":[7],"as":[8,139],"a":[9,58,85,115,123],"cornerstone":[10],"of":[11,152,158],"proteomic":[12],"profiling":[13],"and":[14,21,45,51,91,141,155],"large-scale":[15],"systems":[16],"biology,":[17],"offering":[18],"unparalleled":[19],"depth":[20,148],"reproducibility.":[22],"Current":[23],"DIA":[24,63],"analysis":[25,64],"frameworks,":[26],"however,":[27],"require":[28],"semi-supervised":[29,67],"training":[30,68],"within":[31],"each":[32],"run":[33],"for":[34,89,134],"peptide-spectrum":[35],"match":[36],"(PSM)":[37],"re-scoring.":[38],"This":[39],"approach":[40],"is":[41],"prone":[42],"to":[43,69,114],"overfitting":[44],"lacks":[46],"generalizability":[47],"across":[48,102],"diverse":[49,103,135],"species":[50],"experimental":[52],"conditions.":[53],"Here,":[54],"we":[55],"present":[56],"DIA-CLIP,":[57],"pre-trained":[59],"model":[60],"shifting":[61],"the":[62,150,156],"paradigm":[65],"from":[66],"universal":[70],"cross-modal":[71,87],"representation":[72,88],"learning.":[73],"By":[74],"integrating":[75],"dual-encoder":[76],"contrastive":[77],"learning":[78],"framework":[79],"with":[80],"encoder-decoder":[81],"architecture,":[82],"DIA-CLIP":[83,107,130],"establishes":[84],"unified":[86],"peptides":[90],"corresponding":[92],"spectral":[93],"features,":[94],"achieving":[95,122],"high-precision,":[96],"zero-shot":[97],"PSM":[98],"inference.":[99],"Extensive":[100],"evaluations":[101],"benchmarks":[104],"demonstrate":[105],"that":[106],"consistently":[108],"outperforms":[109],"state-of-the-art":[110],"tools,":[111],"yielding":[112],"up":[113],"45%":[116],"increase":[117],"in":[118,126],"protein":[119],"identification":[120,147],"while":[121],"12%":[124],"reduction":[125],"entrapment":[127],"identifications.":[128],"Moreover,":[129],"holds":[131],"immense":[132],"potential":[133],"practical":[136],"applications,":[137],"such":[138],"single-cell":[140],"spatial":[142],"proteomics,":[143],"where":[144],"its":[145],"enhanced":[146],"facilitates":[149],"discovery":[151],"novel":[153],"biomarkers":[154],"elucidates":[157],"intricate":[159],"cellular":[160],"mechanisms.":[161]},"counts_by_year":[],"updated_date":"2026-02-04T23:14:21.375766","created_date":"2026-02-04T00:00:00"}
