{"id":"https://openalex.org/W2138769522","doi":"https://doi.org/10.1371/journal.pcbi.1000567","title":"A Threading-Based Method for the Prediction of DNA-Binding Proteins with Application to the Human Genome","display_name":"A Threading-Based Method for the Prediction of DNA-Binding Proteins with Application to the Human Genome","publication_year":2009,"publication_date":"2009-11-12","ids":{"openalex":"https://openalex.org/W2138769522","doi":"https://doi.org/10.1371/journal.pcbi.1000567","mag":"2138769522"},"language":"en","primary_location":{"id":"doi:10.1371/journal.pcbi.1000567","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1000567","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1000567&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1000567&type=printable","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000947559","display_name":"Mu Gao","orcid":"https://orcid.org/0000-0002-0378-3704"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mu Gao","raw_affiliation_strings":["Center for the Study of Systems Biology, School of Biology, Georgia Institute of Technology, Atlanta, Georgia, United States of America","Center for the Study of Systems Biology, School of Biology, Georgia Institute of Technology, Atlanta, Georgia, USA"],"affiliations":[{"raw_affiliation_string":"Center for the Study of Systems Biology, School of Biology, Georgia Institute of Technology, Atlanta, Georgia, United States of America","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Center for the Study of Systems Biology, School of Biology, Georgia Institute of Technology, Atlanta, Georgia, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010808261","display_name":"Jeffrey Skolnick","orcid":"https://orcid.org/0000-0002-1877-4958"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jeffrey Skolnick","raw_affiliation_strings":["Center for the Study of Systems Biology, School of Biology, Georgia Institute of Technology, Atlanta, Georgia, United States of America"],"affiliations":[{"raw_affiliation_string":"Center for the Study of Systems Biology, School of Biology, Georgia Institute of Technology, Atlanta, Georgia, United States of America","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5010808261"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":{"value":2655,"currency":"USD","value_usd":2655},"apc_paid":{"value":2655,"currency":"USD","value_usd":2655},"fwci":1.8872,"has_fulltext":true,"cited_by_count":88,"citation_normalized_percentile":{"value":0.85531807,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"5","issue":"11","first_page":"e1000567","last_page":"e1000567"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/threading","display_name":"Threading (protein sequence)","score":0.8665721416473389},{"id":"https://openalex.org/keywords/dna-binding-domain","display_name":"DNA-binding domain","score":0.6396815776824951},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.6209520697593689},{"id":"https://openalex.org/keywords/dna","display_name":"DNA","score":0.6160675287246704},{"id":"https://openalex.org/keywords/protein-function-prediction","display_name":"Protein function prediction","score":0.5402803421020508},{"id":"https://openalex.org/keywords/protein-structure-prediction","display_name":"Protein structure prediction","score":0.5179449915885925},{"id":"https://openalex.org/keywords/dna-binding-site","display_name":"DNA binding site","score":0.5008728504180908},{"id":"https://openalex.org/keywords/hmg-box","display_name":"HMG-box","score":0.4713178277015686},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.4358307123184204},{"id":"https://openalex.org/keywords/structural-genomics","display_name":"Structural genomics","score":0.4327201843261719},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.42032933235168457},{"id":"https://openalex.org/keywords/template","display_name":"Template","score":0.41816848516464233},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.3812047243118286},{"id":"https://openalex.org/keywords/protein-structure","display_name":"Protein structure","score":0.37945204973220825},{"id":"https://openalex.org/keywords/dna-binding-protein","display_name":"DNA-binding protein","score":0.3552341163158417},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.22491279244422913},{"id":"https://openalex.org/keywords/protein-function","display_name":"Protein function","score":0.19236892461776733},{"id":"https://openalex.org/keywords/biochemistry","display_name":"Biochemistry","score":0.19079190492630005},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.17445260286331177},{"id":"https://openalex.org/keywords/promoter","display_name":"Promoter","score":0.11955845355987549},{"id":"https://openalex.org/keywords/transcription-factor","display_name":"Transcription factor","score":0.08823627233505249}],"concepts":[{"id":"https://openalex.org/C200307862","wikidata":"https://www.wikidata.org/wiki/Q7797175","display_name":"Threading (protein sequence)","level":3,"score":0.8665721416473389},{"id":"https://openalex.org/C33987129","wikidata":"https://www.wikidata.org/wiki/Q13479514","display_name":"DNA-binding domain","level":4,"score":0.6396815776824951},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.6209520697593689},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.6160675287246704},{"id":"https://openalex.org/C207060522","wikidata":"https://www.wikidata.org/wiki/Q7251473","display_name":"Protein function prediction","level":4,"score":0.5402803421020508},{"id":"https://openalex.org/C18051474","wikidata":"https://www.wikidata.org/wiki/Q899656","display_name":"Protein structure prediction","level":3,"score":0.5179449915885925},{"id":"https://openalex.org/C3662595","wikidata":"https://www.wikidata.org/wiki/Q5205743","display_name":"DNA binding site","level":5,"score":0.5008728504180908},{"id":"https://openalex.org/C5179208","wikidata":"https://www.wikidata.org/wiki/Q3782057","display_name":"HMG-box","level":5,"score":0.4713178277015686},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.4358307123184204},{"id":"https://openalex.org/C192772702","wikidata":"https://www.wikidata.org/wiki/Q2583975","display_name":"Structural genomics","level":3,"score":0.4327201843261719},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.42032933235168457},{"id":"https://openalex.org/C82714645","wikidata":"https://www.wikidata.org/wiki/Q438331","display_name":"Template","level":2,"score":0.41816848516464233},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.3812047243118286},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.37945204973220825},{"id":"https://openalex.org/C94966510","wikidata":"https://www.wikidata.org/wiki/Q2252764","display_name":"DNA-binding protein","level":4,"score":0.3552341163158417},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.22491279244422913},{"id":"https://openalex.org/C2986374874","wikidata":"https://www.wikidata.org/wiki/Q8054","display_name":"Protein function","level":3,"score":0.19236892461776733},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.19079190492630005},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.17445260286331177},{"id":"https://openalex.org/C101762097","wikidata":"https://www.wikidata.org/wiki/Q224093","display_name":"Promoter","level":4,"score":0.11955845355987549},{"id":"https://openalex.org/C86339819","wikidata":"https://www.wikidata.org/wiki/Q407384","display_name":"Transcription factor","level":3,"score":0.08823627233505249},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1371/journal.pcbi.1000567","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1000567","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1000567&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS Computational Biology","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:7e1076811fe044eb97e6e10b5107e4ee","is_oa":true,"landing_page_url":"https://doaj.org/article/7e1076811fe044eb97e6e10b5107e4ee","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Computational Biology, Vol 5, Iss 11, p e1000567 (2009)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:2770119","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/2770119","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Comput Biol","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1371/journal.pcbi.1000567","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1000567","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1000567&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5600000023841858,"display_name":"Reduced inequalities"},{"id":"https://metadata.un.org/sdg/16","score":0.44999998807907104,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G1053362271","display_name":null,"funder_award_id":"GM-37408","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2138769522.pdf","grobid_xml":"https://content.openalex.org/works/W2138769522.grobid-xml"},"referenced_works_count":70,"referenced_works":["https://openalex.org/W1491553314","https://openalex.org/W1543773936","https://openalex.org/W1845077221","https://openalex.org/W1886106832","https://openalex.org/W1963819736","https://openalex.org/W1970595416","https://openalex.org/W1972654719","https://openalex.org/W1974480392","https://openalex.org/W1974646727","https://openalex.org/W1975415672","https://openalex.org/W1986595328","https://openalex.org/W1990601349","https://openalex.org/W1991185062","https://openalex.org/W1998518361","https://openalex.org/W2015350814","https://openalex.org/W2016619116","https://openalex.org/W2022058405","https://openalex.org/W2027364181","https://openalex.org/W2029667189","https://openalex.org/W2031608406","https://openalex.org/W2039287748","https://openalex.org/W2042714852","https://openalex.org/W2043199972","https://openalex.org/W2048947602","https://openalex.org/W2059296675","https://openalex.org/W2061545358","https://openalex.org/W2064903987","https://openalex.org/W2070636049","https://openalex.org/W2078542633","https://openalex.org/W2079988374","https://openalex.org/W2085277871","https://openalex.org/W2086062742","https://openalex.org/W2098223336","https://openalex.org/W2099117602","https://openalex.org/W2100320834","https://openalex.org/W2102122585","https://openalex.org/W2102245393","https://openalex.org/W2102612168","https://openalex.org/W2102864597","https://openalex.org/W2103017472","https://openalex.org/W2103150692","https://openalex.org/W2108067237","https://openalex.org/W2108215151","https://openalex.org/W2109553965","https://openalex.org/W2113649367","https://openalex.org/W2115595474","https://openalex.org/W2119902292","https://openalex.org/W2121702291","https://openalex.org/W2125546020","https://openalex.org/W2126943194","https://openalex.org/W2128114769","https://openalex.org/W2128533758","https://openalex.org/W2130479394","https://openalex.org/W2140735973","https://openalex.org/W2141149420","https://openalex.org/W2141885858","https://openalex.org/W2144258433","https://openalex.org/W2144347309","https://openalex.org/W2148004368","https://openalex.org/W2152326664","https://openalex.org/W2154447604","https://openalex.org/W2155601193","https://openalex.org/W2156360673","https://openalex.org/W2156690214","https://openalex.org/W2158714788","https://openalex.org/W2160223331","https://openalex.org/W2161732286","https://openalex.org/W2612812586","https://openalex.org/W4210323379","https://openalex.org/W6675949863"],"related_works":["https://openalex.org/W1504467230","https://openalex.org/W2170471837","https://openalex.org/W2345114863","https://openalex.org/W2141001285","https://openalex.org/W2041583538","https://openalex.org/W1870851597","https://openalex.org/W1606742816","https://openalex.org/W2294631951","https://openalex.org/W2138769522","https://openalex.org/W2143222112"],"abstract_inverted_index":{"Diverse":[0],"mechanisms":[1],"for":[2,27,33,76,173,216,226],"DNA-protein":[3,96],"recognition":[4],"have":[5,249],"been":[6],"elucidated":[7],"in":[8,273],"numerous":[9],"atomic":[10],"complex":[11,97],"structures":[12],"from":[13,42,240],"various":[14],"protein":[15,43,85,238],"families.":[16],"These":[17],"structural":[18,190],"data":[19],"provide":[20],"an":[21,57,167],"invaluable":[22],"knowledge":[23],"base":[24],"not":[25,291],"only":[26,100],"understanding":[28],"DNAprotein":[29],"interactions,":[30],"but":[31],"also":[32],"developing":[34],"specialized":[35],"methods":[36,47],"that":[37,54,222,260,280],"predict":[38],"the":[39,61,77,101,140,154,180,188,213,241],"DNA-binding":[40,80,84,111,125,178,250],"function":[41,228],"structure.":[44],"While":[45],"such":[46],"are":[48,113,192,246,265],"useful,":[49],"a":[50,71,91,144,231,288],"major":[51],"limitation":[52],"is":[53,135,150,161,224,278],"they":[55],"require":[56],"experimental":[58,168,198],"structure":[59,169],"of":[60,79,95,146,176,187,196,262,282],"target":[62,102],"as":[63,115,170],"input.":[64,171],"To":[65,220],"overcome":[66],"this":[67],"obstacle,":[68],"we":[69,268],"develop":[70],"threading-based":[72],"method,":[73,88],"DNA-Binding-Domain-Threader":[74],"(DBD-Threader),":[75],"prediction":[78],"domains":[81,286],"and":[82,110,126,160],"associated":[83,202],"residues.":[86],"Our":[87],"which":[89,165],"uses":[90],"template":[92],"library":[93],"composed":[94],"structures,":[98,199],"requires":[99,166],"protein's":[103],"sequence.":[104],"In":[105,120,275],"our":[106,263],"approach,":[107],"fold":[108],"similarity":[109],"propensity":[112],"employed":[114],"two":[116],"functional":[117,289],"discriminating":[118],"properties.":[119],"benchmark":[121],"tests":[122],"on":[123,230],"179":[124],"3,797":[127],"non-DNA-binding":[128],"proteins,":[129],"using":[130],"templates":[131],"whose":[132],"sequence":[133,156],"identity":[134],"less":[136],"than":[137,153],"30%":[138],"to":[139,163,236,248,293],"target,":[141],"DBD-Threader":[142,210,223,233],"achieves":[143],"sensitivity/precision":[145],"56%/86%.":[147],"This":[148],"performance":[149],"considerably":[151],"better":[152],"standard":[155],"comparison":[157],"method":[158],"PSI-BLAST":[159],"comparable":[162],"DBD-Hunter,":[164],"Moreover,":[172],"over":[174],"70%":[175],"predicted":[177,218,247],"domains,":[179],"backbone":[181],"Root":[182],"Mean":[183],"Square":[184],"Deviations":[185],"(RMSDs)":[186],"top-ranked":[189],"models":[191],"within":[193],"6.5":[194],"A":[195],"their":[197,201],"with":[200,253],"DNAbinding":[203],"sites":[204],"identified":[205],"at":[206],"satisfactory":[207],"accuracy.":[208],"Additionally,":[209],"correctly":[211],"assigned":[212],"SCOP":[214],"superfamily":[215],"most":[217],"domains.":[219],"demonstrate":[221],"useful":[225],"automatic":[227],"annotation":[229],"large-scale,":[232],"was":[234],"applied":[235],"18,631":[237],"sequences":[239],"human":[242],"genome;":[243],"1,654":[244],"proteins":[245],"function.":[251],"Comparison":[252],"existing":[254],"Gene":[255],"Ontology":[256],"(GO)":[257],"annotations":[258],"suggests":[259],",30%":[261],"predictions":[264,272],"new.":[266],"Finally,":[267],"present":[269],"some":[270],"interesting":[271],"detail.":[274],"particular,":[276],"it":[277],"estimated":[279],",20%":[281],"classic":[283],"zinc":[284],"finger":[285],"play":[287],"role":[290],"related":[292],"direct":[294],"DNA-binding.":[295]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":8},{"year":2015,"cited_by_count":10},{"year":2014,"cited_by_count":9},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":4}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2016-06-24T00:00:00"}
