{"id":"https://openalex.org/W7134835957","doi":"https://doi.org/10.48550/arxiv.2603.07710","title":"Reverse Distillation: Consistently Scaling Protein Language Model Representations","display_name":"Reverse Distillation: Consistently Scaling Protein Language Model Representations","publication_year":2026,"publication_date":"2026-03-08","ids":{"openalex":"https://openalex.org/W7134835957","doi":"https://doi.org/10.48550/arxiv.2603.07710"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.07710","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039033609","display_name":"Darius Catrina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Catrina, Darius","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128663104","display_name":"Christian Bepler","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bepler, Christian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020346838","display_name":"Samuel Sledzieski","orcid":"https://orcid.org/0000-0002-0170-3029"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sledzieski, Samuel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128665848","display_name":"Rohit Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Rohit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.3264999985694885,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.3264999985694885,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.1835000067949295,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.04699999839067459,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.73089998960495},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.667900025844574},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.6456000208854675},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5789999961853027},{"id":"https://openalex.org/keywords/linear-subspace","display_name":"Linear subspace","score":0.5738000273704529},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.46790000796318054},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.34060001373291016},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.3382999897003174}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.73089998960495},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.667900025844574},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.6456000208854675},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6241999864578247},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5789999961853027},{"id":"https://openalex.org/C12362212","wikidata":"https://www.wikidata.org/wiki/Q728435","display_name":"Linear subspace","level":2,"score":0.5738000273704529},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.5167999863624573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5081999897956848},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.46790000796318054},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4293999969959259},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3409999907016754},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3382999897003174},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3271999955177307},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3257000148296356},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C91682802","wikidata":"https://www.wikidata.org/wiki/Q620538","display_name":"Multidimensional scaling","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.25619998574256897}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.07710","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.07710","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.07710","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.07710","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Unlike":[0],"the":[1,23,37,40,62,73,84,87,132,145,150,157],"predictable":[2],"scaling":[3,169],"laws":[4],"in":[5,30,39],"natural":[6],"language":[7,13],"processing":[8],"and":[9,121,173],"computer":[10],"vision,":[11],"protein":[12,113],"models":[14,21,34,60,95,175],"(PLMs)":[15],"scale":[16],"poorly:":[17],"for":[18],"many":[19],"tasks,":[20],"within":[22],"same":[24,63,146],"family":[25,167],"plateau":[26],"or":[27],"even":[28],"decrease":[29],"performance,":[31],"with":[32,149],"mid-sized":[33],"often":[35],"outperforming":[36],"largest":[38],"family.":[41,64],"We":[42],"introduce":[43],"Reverse":[44,115],"Distillation,":[45],"a":[46,69,78],"principled":[47],"framework":[48,161],"that":[49,92,104],"decomposes":[50],"large":[51],"PLM":[52],"representations":[53],"into":[54],"orthogonal":[55],"subspaces":[56],"guided":[57],"by":[58,108],"smaller":[59,88,98,105],"of":[61,77],"The":[65],"resulting":[66],"embeddings":[67],"have":[68],"nested,":[70],"Matryoshka-style":[71],"structure:":[72],"first":[74],"k":[75],"dimensions":[76],"larger":[79,93,127],"model's":[80],"embedding":[81,147],"are":[82,176],"exactly":[83],"representation":[85],"from":[86,126],"model.":[89],"This":[90],"ensures":[91],"reverse-distilled":[94,137,151],"consistently":[96],"outperform":[97,140],"ones.":[99],"A":[100],"motivating":[101],"intuition":[102],"is":[103,162],"models,":[106,128],"constrained":[107],"capacity,":[109],"preferentially":[110],"encode":[111],"broadly-shared":[112],"features.":[114],"distillation":[116],"isolates":[117],"these":[118],"shared":[119],"features":[120],"orthogonally":[122],"extracts":[123],"additional":[124],"contributions":[125],"preventing":[129],"interference":[130],"between":[131],"two.":[133],"On":[134],"ProteinGym":[135],"benchmarks,":[136],"ESM-2":[138],"variants":[139],"their":[141],"respective":[142],"baselines":[143],"at":[144,178],"dimensionality,":[148],"15":[152],"billion":[153],"parameter":[154],"model":[155,166],"achieving":[156],"strongest":[158],"performance.":[159],"Our":[160],"generalizable":[163],"to":[164],"any":[165],"where":[168],"challenges":[170],"persist.":[171],"Code":[172],"trained":[174],"available":[177],"https://github.com/rohitsinghlab/plm_reverse_distillation.":[179]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-11T00:00:00"}
