{"id":"https://openalex.org/W7128428985","doi":"https://doi.org/10.1371/journal.pcbi.1013925","title":"Trainable subnetworks reveal insights into structure knowledge organization in protein language models","display_name":"Trainable subnetworks reveal insights into structure knowledge organization in protein language models","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7128428985","doi":"https://doi.org/10.1371/journal.pcbi.1013925","pmid":"https://pubmed.ncbi.nlm.nih.gov/41662462"},"language":"en","primary_location":{"id":"doi:10.1371/journal.pcbi.1013925","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1013925","pdf_url":null,"source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1371/journal.pcbi.1013925","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015655299","display_name":"Ria Vinod","orcid":"https://orcid.org/0000-0002-6282-9741"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ria Vinod","raw_affiliation_strings":["Center for Computational and Molecular Biology, Brown University, Providence, Rhode Island, United States of America"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Computational and Molecular Biology, Brown University, Providence, Rhode Island, United States of America","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071306257","display_name":"Ava P. Amini","orcid":"https://orcid.org/0000-0002-8601-6040"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ava P. Amini","raw_affiliation_strings":["Microsoft Research, Cambridge, Massachusetts, United States of America"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research, Cambridge, Massachusetts, United States of America","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123097120","display_name":"Lorin Crawford","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lorin Crawford","raw_affiliation_strings":["Microsoft Research, Cambridge, Massachusetts, United States of America"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research, Cambridge, Massachusetts, United States of America","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":null,"display_name":"Kevin K. Yang","orcid":"https://orcid.org/0000-0001-9045-6826"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin K. Yang","raw_affiliation_strings":["Microsoft Research, Cambridge, Massachusetts, United States of America"],"raw_orcid":"https://orcid.org/0000-0001-9045-6826","affiliations":[{"raw_affiliation_string":"Microsoft Research, Cambridge, Massachusetts, United States of America","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2655,"currency":"USD","value_usd":2655},"apc_paid":{"value":2655,"currency":"USD","value_usd":2655},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16594712,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"22","issue":"2","first_page":"e1013925","last_page":"e1013925"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.38260000944137573,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.38260000944137573,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.08320000022649765,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.07769999653100967,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5669999718666077},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4239000082015991},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.3495999872684479},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.30239999294281006},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.2865000069141388},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.2831999957561493}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7303000092506409},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5669999718666077},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5351999998092651},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5156999826431274},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4239000082015991},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.3495999872684479},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33000001311302185},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.2766000032424927},{"id":"https://openalex.org/C2992535486","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Language structure","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.25029999017715454}],"mesh":[{"descriptor_ui":"D000098342","descriptor_name":"Large Language Models","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007802","descriptor_name":"Language","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007802","descriptor_name":"Language","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007802","descriptor_name":"Language","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":3,"locations":[{"id":"doi:10.1371/journal.pcbi.1013925","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1013925","pdf_url":null,"source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},{"id":"pmid:41662462","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41662462","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS computational biology","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:12928587","is_oa":true,"landing_page_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC12928587/","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Comput Biol","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1371/journal.pcbi.1013925","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1013925","pdf_url":null,"source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6809158325195312,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2008708467","https://openalex.org/W2108067237","https://openalex.org/W2980789587","https://openalex.org/W2995514860","https://openalex.org/W3040739508","https://openalex.org/W3119259767","https://openalex.org/W3133458480","https://openalex.org/W3146944767","https://openalex.org/W3173673636","https://openalex.org/W3177828909","https://openalex.org/W3179485843","https://openalex.org/W3191761521","https://openalex.org/W4286669150","https://openalex.org/W4309643848","https://openalex.org/W4327550249","https://openalex.org/W4386629205","https://openalex.org/W4391652655","https://openalex.org/W4400231442","https://openalex.org/W4403824354","https://openalex.org/W4404420622","https://openalex.org/W4404783271","https://openalex.org/W4409525624","https://openalex.org/W4412706621"],"related_works":[],"abstract_inverted_index":{"Protein":[0],"language":[1,8,56,146],"models":[2,32],"(PLMs)":[3],"pretrained":[4,166],"via":[5],"a":[6,15,60,159],"masked":[7],"modeling":[9,57,147],"objective":[10],"have":[11],"proven":[12],"effective":[13],"across":[14],"range":[16],"of":[17,63,80,176],"structure-related":[18],"tasks,":[19],"including":[20],"high-resolution":[21],"structure":[22,91,106,132,153],"prediction.":[23,107],"However,":[24],"it":[25],"remains":[26],"unclear":[27],"to":[28,116,137,172],"what":[29],"extent":[30],"these":[31,94],"factorize":[33],"protein":[34],"structural":[35,61,100],"categories":[36],"among":[37],"their":[38],"learned":[39,177],"parameters.":[40],"In":[41],"this":[42],"work,":[43],"we":[44,97,129],"introduce":[45],"trainable":[46],"subnetworks,":[47,96],"which":[48],"mask":[49],"out":[50],"the":[51,86,174],"PLM":[52,69,95,139,178],"weights":[53],"responsible":[54],"for":[55,161],"performance":[58,148],"on":[59],"category":[62],"proteins.":[64],"We":[65],"systematically":[66],"trained":[67],"39":[68],"subnetworks":[70],"targeting":[71],"both":[72],"sequence-":[73],"and":[74,89,119,141,168],"residue-level":[75],"features":[76,118],"at":[77],"varying":[78],"degrees":[79],"resolution":[81],"using":[82],"annotations":[83],"defined":[84],"by":[85],"CATH":[87],"taxonomy":[88],"secondary":[90],"elements.":[92],"Using":[93],"assessed":[98],"how":[99],"factorization":[101],"in":[102,145],"PLMs":[103,112,167],"influences":[104],"downstream":[105],"Our":[108,156],"results":[109],"show":[110],"that":[111,131,142],"are":[113],"highly":[114,135],"sensitive":[115],"sequence-level":[117],"can":[120,149,169],"predominantly":[121],"disentangle":[122],"extremely":[123],"coarse":[124],"or":[125],"fine-grained":[126],"information.":[127],"Furthermore,":[128],"observe":[130],"prediction":[133,154],"is":[134],"responsive":[136],"factorized":[138],"representations":[140,179],"small":[143],"changes":[144],"significantly":[150],"impair":[151],"PLM-based":[152],"capabilities.":[155],"work":[157],"presents":[158],"framework":[160],"studying":[162],"feature":[163],"entanglement":[164],"within":[165],"be":[170],"leveraged":[171],"improve":[173],"alignment":[175],"with":[180],"known":[181],"biological":[182],"concepts.":[183]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-10T00:00:00"}
