{"id":"https://openalex.org/W2020016327","doi":"https://doi.org/10.1109/bibe.2007.4375583","title":"Assessing the Performance of Macromolecular Sequence Classifiers","display_name":"Assessing the Performance of Macromolecular Sequence Classifiers","publication_year":2007,"publication_date":"2007-10-01","ids":{"openalex":"https://openalex.org/W2020016327","doi":"https://doi.org/10.1109/bibe.2007.4375583","mag":"2020016327"},"language":"en","primary_location":{"id":"doi:10.1109/bibe.2007.4375583","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibe.2007.4375583","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2007 IEEE 7th International Symposium on BioInformatics and BioEngineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089085275","display_name":"Cornelia Caragea","orcid":"https://orcid.org/0000-0002-5664-2163"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cornelia Caragea","raw_affiliation_strings":["Computer Science Department, Iowa State University, Ames, IA, USA","Iowa State Univ. Ames, Ames"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department, Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]},{"raw_affiliation_string":"Iowa State Univ. Ames, Ames","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059212363","display_name":"Jivko Sinapov","orcid":"https://orcid.org/0000-0003-4852-026X"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jivko Sinapov","raw_affiliation_strings":["Computer Science Department, Iowa State University, Ames, IA, USA","Iowa State Univ. Ames, Ames"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department, Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]},{"raw_affiliation_string":"Iowa State Univ. Ames, Ames","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004737962","display_name":"Vasant Honavar","orcid":"https://orcid.org/0000-0001-5399-3489"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vasant Honavar","raw_affiliation_strings":["Computer Science Department, Iowa State University, Ames, IA, USA","Iowa State Univ. Ames, Ames"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department, Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]},{"raw_affiliation_string":"Iowa State Univ. Ames, Ames","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074103258","display_name":"Drena Dobbs","orcid":"https://orcid.org/0000-0003-4404-9554"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Drena Dobbs","raw_affiliation_strings":["Department of Genetics and Cell Biology, Iowa State University, Ames, IA, USA","Department of Genetics and Cell Biology, Iowa State University, Ames, Iowa, USA. Email: ddobbs@iastate.edu"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Genetics and Cell Biology, Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]},{"raw_affiliation_string":"Department of Genetics and Cell Biology, Iowa State University, Ames, Iowa, USA. Email: ddobbs@iastate.edu","institution_ids":["https://openalex.org/I173911158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2113,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.7730465,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"1","issue":null,"first_page":"320","last_page":"326"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7045392394065857},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.6826539039611816},{"id":"https://openalex.org/keywords/cross-validation","display_name":"Cross-validation","score":0.6302816867828369},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6300084590911865},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6251739859580994},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6113176345825195},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6104856729507446},{"id":"https://openalex.org/keywords/matthews-correlation-coefficient","display_name":"Matthews correlation coefficient","score":0.5569474697113037},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4803265631198883},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4731907844543457},{"id":"https://openalex.org/keywords/performance-prediction","display_name":"Performance prediction","score":0.4585835933685303},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4473527669906616},{"id":"https://openalex.org/keywords/model-selection","display_name":"Model selection","score":0.42831653356552124},{"id":"https://openalex.org/keywords/bayes-theorem","display_name":"Bayes' theorem","score":0.41644272208213806},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.23926445841789246},{"id":"https://openalex.org/keywords/simulation","display_name":"Simulation","score":0.10553398728370667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7045392394065857},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.6826539039611816},{"id":"https://openalex.org/C27181475","wikidata":"https://www.wikidata.org/wiki/Q541014","display_name":"Cross-validation","level":2,"score":0.6302816867828369},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6300084590911865},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6251739859580994},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6113176345825195},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6104856729507446},{"id":"https://openalex.org/C164085508","wikidata":"https://www.wikidata.org/wiki/Q4811327","display_name":"Matthews correlation coefficient","level":3,"score":0.5569474697113037},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4803265631198883},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4731907844543457},{"id":"https://openalex.org/C2777115002","wikidata":"https://www.wikidata.org/wiki/Q7168246","display_name":"Performance prediction","level":2,"score":0.4585835933685303},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4473527669906616},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.42831653356552124},{"id":"https://openalex.org/C207201462","wikidata":"https://www.wikidata.org/wiki/Q182505","display_name":"Bayes' theorem","level":3,"score":0.41644272208213806},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.23926445841789246},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.10553398728370667},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/bibe.2007.4375583","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibe.2007.4375583","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2007 IEEE 7th International Symposium on BioInformatics and BioEngineering","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.117.2649","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.117.2649","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.iastate.edu/~cornelia/papersC/BIBE07/corneliaBIBE08072007.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.162.1428","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.162.1428","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.iastate.edu/~cornelia/papers/bibe07.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.72.8945","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.72.8945","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.iastate.edu/~honavar/Papers/BIBE2007.pdf","raw_type":"text"},{"id":"pmh:oai:dr.lib.iastate.edu:20.500.12876/3wxa2Wxv","is_oa":false,"landing_page_url":"https://dr.lib.iastate.edu/handle/20.500.12876/3wxa2Wxv","pdf_url":null,"source":{"id":"https://openalex.org/S4377196104","display_name":"Iowa State University Digital Repository (Iowa State University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I173911158","host_organization_name":"Iowa State University","host_organization_lineage":["https://openalex.org/I173911158"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/BIBE.2007.4375583","raw_type":"Presentation"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1488329660","https://openalex.org/W1491553314","https://openalex.org/W1517113043","https://openalex.org/W1529355025","https://openalex.org/W1534477342","https://openalex.org/W1580142630","https://openalex.org/W2008708467","https://openalex.org/W2031772330","https://openalex.org/W2044775131","https://openalex.org/W2084787613","https://openalex.org/W2104244040","https://openalex.org/W2107432340","https://openalex.org/W2119423166","https://openalex.org/W2130479394","https://openalex.org/W2139212933","https://openalex.org/W2148603752","https://openalex.org/W2153153865","https://openalex.org/W2155239883","https://openalex.org/W2164412127","https://openalex.org/W2164834993","https://openalex.org/W2165951927","https://openalex.org/W2168272116","https://openalex.org/W4254721730","https://openalex.org/W6635058605","https://openalex.org/W6684448134"],"related_works":["https://openalex.org/W816105089","https://openalex.org/W4309048734","https://openalex.org/W2381029635","https://openalex.org/W1556261580","https://openalex.org/W2976286905","https://openalex.org/W2389646300","https://openalex.org/W2093725103","https://openalex.org/W4239059384","https://openalex.org/W2020016327","https://openalex.org/W2564588636"],"abstract_inverted_index":{"Machine":[0],"learning":[1],"approaches":[2,9],"offer":[3],"some":[4,68],"of":[5,20,28,44,51,59,82,86,117,158,191],"the":[6,26,37,45,57,74,80,84,172,180],"most":[7],"cost-effective":[8],"to":[10],"building":[11],"predictive":[12,47,87],"models":[13],"(e.g.,":[14,39],"classifiers)":[15],"in":[16,22,67],"a":[17],"broad":[18],"range":[19],"applications":[21],"computational":[23],"biology.":[24],"Comparing":[25],"effectiveness":[27],"different":[29,60,71,156],"algorithms":[30],"requires":[31],"reliable":[32],"procedures":[33,65,110,149],"for":[34,73],"accurately":[35],"assessing":[36,83],"performance":[38,76,85,161,181,192],"accuracy,":[40,165],"sensitivity,":[41],"and":[42,63,66,99,107,123,139,146,150,169],"specificity)":[43],"resulting":[46],"classifiers.":[48],"The":[49],"difficulty":[50],"this":[52],"task":[53],"is":[54],"compounded":[55],"by":[56],"use":[58],"data":[61,100,151],"selection":[62,101,152],"evaluation":[64],"cases,":[69],"even":[70],"definitions":[72],"same":[75],"measures.":[77],"We":[78,177],"explore":[79],"problem":[81],"classifiers":[88,136],"trained":[89],"on":[90,97,111],"macromolecular":[91],"sequence":[92],"data,":[93],"with":[94,133],"an":[95],"emphasis":[96],"cross-validation":[98,109,148,186],"methods.":[102],"Specifically,":[103],"we":[104],"compare":[105],"sequence-based":[106,113,145,185],"window-based":[108,197],"three":[112],"prediction":[114],"tasks:":[115],"identification":[116],"glycosylation":[118],"sites,":[119],"RNA-Protein":[120],"interface":[121,125],"residues,":[122],"Protein-Protein":[124],"residues":[126],"from":[127],"amino":[128],"acid":[129],"sequence.":[130],"Our":[131],"experiments":[132],"two":[134],"representative":[135],"(Naive":[137],"Bayes":[138],"Support":[140],"Vector":[141],"Machine)":[142],"show":[143],"that":[144,179],"windows-based":[147],"methods":[153],"can":[154],"yield":[155],"estimates":[157,182,190],"commonly":[159],"used":[160],"measures":[162],"such":[163],"as":[164],"Matthews":[166],"correlation":[167],"coefficient":[168],"area":[170],"under":[171],"Receiver":[173],"Operating":[174],"Characteristic":[175],"curve.":[176],"argue":[178],"obtained":[183,195],"using":[184,196],"provide":[187],"more":[188],"realistic":[189],"than":[193],"those":[194],"cross-validation.":[198]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
