{"id":"https://openalex.org/W2143559289","doi":"https://doi.org/10.1109/cibcb.2008.4675782","title":"Mining sequence features for DNA-binding site prediction","display_name":"Mining sequence features for DNA-binding site prediction","publication_year":2008,"publication_date":"2008-09-01","ids":{"openalex":"https://openalex.org/W2143559289","doi":"https://doi.org/10.1109/cibcb.2008.4675782","mag":"2143559289"},"language":"en","primary_location":{"id":"doi:10.1109/cibcb.2008.4675782","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cibcb.2008.4675782","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE Symposium on Computational Intelligence in Bioinformatics and Computational Biology","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101421877","display_name":"Jing Hu","orcid":"https://orcid.org/0000-0003-1348-8773"},"institutions":[{"id":"https://openalex.org/I121980950","display_name":"Utah State University","ror":"https://ror.org/00h6set76","country_code":"US","type":"education","lineage":["https://openalex.org/I121980950"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jing Hu","raw_affiliation_strings":["Department of Computer Science, Utah State University, Logan, UT, USA","Dept. of Comput. Sci., Utah State Univ., Logan, UT"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Utah State University, Logan, UT, USA","institution_ids":["https://openalex.org/I121980950"]},{"raw_affiliation_string":"Dept. of Comput. Sci., Utah State Univ., Logan, UT","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100871727","display_name":"Changhui Yan","orcid":"https://orcid.org/0000-0003-3048-8628"},"institutions":[{"id":"https://openalex.org/I121980950","display_name":"Utah State University","ror":"https://ror.org/00h6set76","country_code":"US","type":"education","lineage":["https://openalex.org/I121980950"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Changhui Yan","raw_affiliation_strings":["Department of Computer Science, Utah State University, Logan, UT, USA","Dept. of Comput. Sci., Utah State Univ., Logan, UT"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Utah State University, Logan, UT, USA","institution_ids":["https://openalex.org/I121980950"]},{"raw_affiliation_string":"Dept. of Comput. Sci., Utah State Univ., Logan, UT","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101421877"],"corresponding_institution_ids":["https://openalex.org/I121980950"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.12564151,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":"6","issue":null,"first_page":"219","last_page":"222"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dna-binding-site","display_name":"DNA binding site","score":0.5817639827728271},{"id":"https://openalex.org/keywords/dna","display_name":"DNA","score":0.5573738217353821},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.5363065004348755},{"id":"https://openalex.org/keywords/matthews-correlation-coefficient","display_name":"Matthews correlation coefficient","score":0.5170919895172119},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5104554891586304},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.48624861240386963},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.4839251637458801},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.47983261942863464},{"id":"https://openalex.org/keywords/hmg-box","display_name":"HMG-box","score":0.449192076921463},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.4136894941329956},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3715144395828247},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3545801639556885},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.3343198597431183},{"id":"https://openalex.org/keywords/dna-binding-protein","display_name":"DNA-binding protein","score":0.33384037017822266},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3213048577308655},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.32014715671539307},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.3122336268424988},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.11865001916885376},{"id":"https://openalex.org/keywords/transcription-factor","display_name":"Transcription factor","score":0.10923543572425842}],"concepts":[{"id":"https://openalex.org/C3662595","wikidata":"https://www.wikidata.org/wiki/Q5205743","display_name":"DNA binding site","level":5,"score":0.5817639827728271},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.5573738217353821},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.5363065004348755},{"id":"https://openalex.org/C164085508","wikidata":"https://www.wikidata.org/wiki/Q4811327","display_name":"Matthews correlation coefficient","level":3,"score":0.5170919895172119},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5104554891586304},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48624861240386963},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.4839251637458801},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.47983261942863464},{"id":"https://openalex.org/C5179208","wikidata":"https://www.wikidata.org/wiki/Q3782057","display_name":"HMG-box","level":5,"score":0.449192076921463},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.4136894941329956},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3715144395828247},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3545801639556885},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.3343198597431183},{"id":"https://openalex.org/C94966510","wikidata":"https://www.wikidata.org/wiki/Q2252764","display_name":"DNA-binding protein","level":4,"score":0.33384037017822266},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3213048577308655},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.32014715671539307},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.3122336268424988},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.11865001916885376},{"id":"https://openalex.org/C86339819","wikidata":"https://www.wikidata.org/wiki/Q407384","display_name":"Transcription factor","level":3,"score":0.10923543572425842},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0},{"id":"https://openalex.org/C101762097","wikidata":"https://www.wikidata.org/wiki/Q224093","display_name":"Promoter","level":4,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cibcb.2008.4675782","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cibcb.2008.4675782","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE Symposium on Computational Intelligence in Bioinformatics and Computational Biology","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1491553314","https://openalex.org/W1543773936","https://openalex.org/W1547246444","https://openalex.org/W1570448133","https://openalex.org/W1982178616","https://openalex.org/W1989075680","https://openalex.org/W2005423065","https://openalex.org/W2014731953","https://openalex.org/W2016907728","https://openalex.org/W2027473831","https://openalex.org/W2052546458","https://openalex.org/W2096986143","https://openalex.org/W2097697746","https://openalex.org/W2099254366","https://openalex.org/W2099414888","https://openalex.org/W2102864597","https://openalex.org/W2107432340","https://openalex.org/W2130479394","https://openalex.org/W2136832587","https://openalex.org/W2153153865","https://openalex.org/W2166248605","https://openalex.org/W6629497997"],"related_works":["https://openalex.org/W2586219588","https://openalex.org/W2030922238","https://openalex.org/W1870851597","https://openalex.org/W2099901107","https://openalex.org/W1776902257","https://openalex.org/W2148072770","https://openalex.org/W1926921733","https://openalex.org/W1974642429","https://openalex.org/W2028056900","https://openalex.org/W2052894840"],"abstract_inverted_index":{"Protein-DNA":[0],"interactions":[1],"play":[2],"pivotal":[3],"roles":[4],"in":[5,34],"gene":[6],"regulation":[7],"and":[8,11],"DNA":[9],"replication":[10],"repair.":[12],"Since":[13,97],"the":[14,52,68,100,109],"3-dimensional":[15],"structure":[16],"of":[17,54,64,99],"most":[18],"proteins":[19],"is":[20,83],"still":[21],"unknown,":[22],"computational":[23],"methods":[24],"which":[25,82],"can":[26,103,112],"identify":[27,113],"DNA-binding":[28,55,114],"sites":[29,115],"from":[30,61,106],"protein":[31,107,118],"sequences":[32,119],"are":[33,49],"demand.":[35],"In":[36],"this":[37],"study,":[38],"we":[39],"used":[40,91],"a":[41,62,72,87],"greedy":[42],"method":[43,75,89,111],"to":[44],"search":[45],"for":[46,51],"features":[47,58,94,102],"that":[48,90],"useful":[50],"identification":[53],"sites.":[56],"5":[57,70,101],"were":[59],"selected":[60,69],"pool":[63],"534":[65],"features.":[66],"Using":[67],"features,":[71],"Naive":[73],"Bayes":[74],"achieved":[76],"0.31":[77],"Matthews":[78],"correlation":[79],"coefficient":[80],"(MCC),":[81],"an":[84],"improvement":[85],"over":[86],"previous":[88],"only":[92,117],"2":[93],"as":[95,120],"input.":[96,121],"all":[98],"be":[104],"derived":[105],"sequences,":[108],"proposed":[110],"using":[116]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
