{"id":"https://openalex.org/W2021385006","doi":"https://doi.org/10.1142/s0219720003000216","title":"DATA MINING TOOLS FOR BIOLOGICAL SEQUENCES","display_name":"DATA MINING TOOLS FOR BIOLOGICAL SEQUENCES","publication_year":2003,"publication_date":"2003-04-01","ids":{"openalex":"https://openalex.org/W2021385006","doi":"https://doi.org/10.1142/s0219720003000216","mag":"2021385006","pmid":"https://pubmed.ncbi.nlm.nih.gov/15290785"},"language":"en","primary_location":{"id":"doi:10.1142/s0219720003000216","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219720003000216","pdf_url":null,"source":{"id":"https://openalex.org/S155349577","display_name":"Journal of Bioinformatics and Computational Biology","issn_l":"0219-7200","issn":["0219-7200","1757-6334"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311754","host_organization_name":"Imperial College Press","host_organization_lineage":["https://openalex.org/P4310311754"],"host_organization_lineage_names":["Imperial College Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Bioinformatics and Computational Biology","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071288718","display_name":"Huiqing Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"HUIQING LIU","raw_affiliation_strings":["Institute for Infocomm Research, 21 Heng Mui Keng Terrace, Singapore 119613, Singapore","Institute for Infocomm Research, 21, Heng Mui Keng Terrace Singapore 119613, Singapore#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, 21 Heng Mui Keng Terrace, Singapore 119613, Singapore","institution_ids":["https://openalex.org/I3005327000"]},{"raw_affiliation_string":"Institute for Infocomm Research, 21, Heng Mui Keng Terrace Singapore 119613, Singapore#TAB#","institution_ids":["https://openalex.org/I3005327000"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012381970","display_name":"Limsoon Wong","orcid":"https://orcid.org/0000-0003-1241-5441"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"LIMSOON WONG","raw_affiliation_strings":["Institute for Infocomm Research, 21 Heng Mui Keng Terrace, Singapore 119613, Singapore","Institute for Infocomm Research, 21, Heng Mui Keng Terrace Singapore 119613, Singapore#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, 21 Heng Mui Keng Terrace, Singapore 119613, Singapore","institution_ids":["https://openalex.org/I3005327000"]},{"raw_affiliation_string":"Institute for Infocomm Research, 21, Heng Mui Keng Terrace Singapore 119613, Singapore#TAB#","institution_ids":["https://openalex.org/I3005327000"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.0933,"has_fulltext":false,"cited_by_count":84,"citation_normalized_percentile":{"value":0.91850761,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"01","issue":"01","first_page":"139","last_page":"167"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7417903542518616},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5570409297943115},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5194393992424011},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.516718327999115},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.466824471950531},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.45996150374412537},{"id":"https://openalex.org/keywords/bayes-theorem","display_name":"Bayes' theorem","score":0.44411152601242065},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.44294896721839905},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4420984983444214},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4373754560947418},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3618152141571045},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.13808244466781616}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7417903542518616},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5570409297943115},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5194393992424011},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.516718327999115},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.466824471950531},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.45996150374412537},{"id":"https://openalex.org/C207201462","wikidata":"https://www.wikidata.org/wiki/Q182505","display_name":"Bayes' theorem","level":3,"score":0.44411152601242065},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44294896721839905},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4420984983444214},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4373754560947418},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3618152141571045},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.13808244466781616},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010442","descriptor_name":"Peptide Chain Initiation, Translational","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010442","descriptor_name":"Peptide Chain Initiation, Translational","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010442","descriptor_name":"Peptide Chain Initiation, Translational","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012333","descriptor_name":"RNA, Messenger","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D012333","descriptor_name":"RNA, Messenger","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D012333","descriptor_name":"RNA, Messenger","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D014176","descriptor_name":"Protein Biosynthesis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D014176","descriptor_name":"Protein Biosynthesis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D014176","descriptor_name":"Protein Biosynthesis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D017423","descriptor_name":"Sequence Analysis, RNA","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D017423","descriptor_name":"Sequence Analysis, RNA","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D017423","descriptor_name":"Sequence Analysis, RNA","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030561","descriptor_name":"Databases, Nucleic Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030561","descriptor_name":"Databases, Nucleic Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030561","descriptor_name":"Databases, Nucleic Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1142/s0219720003000216","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219720003000216","pdf_url":null,"source":{"id":"https://openalex.org/S155349577","display_name":"Journal of Bioinformatics and Computational Biology","issn_l":"0219-7200","issn":["0219-7200","1757-6334"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311754","host_organization_name":"Imperial College Press","host_organization_lineage":["https://openalex.org/P4310311754"],"host_organization_lineage_names":["Imperial College Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Bioinformatics and Computational Biology","raw_type":"journal-article"},{"id":"pmid:15290785","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/15290785","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of bioinformatics and computational biology","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":100,"referenced_works":["https://openalex.org/W35886515","https://openalex.org/W639252573","https://openalex.org/W1480376833","https://openalex.org/W1496612019","https://openalex.org/W1498183065","https://openalex.org/W1498436455","https://openalex.org/W1512098439","https://openalex.org/W1520669656","https://openalex.org/W1523793568","https://openalex.org/W1537443165","https://openalex.org/W1547515408","https://openalex.org/W1548934951","https://openalex.org/W1558136956","https://openalex.org/W1594031697","https://openalex.org/W1604938182","https://openalex.org/W1726370773","https://openalex.org/W1966316616","https://openalex.org/W1966701961","https://openalex.org/W1966849089","https://openalex.org/W1967610656","https://openalex.org/W1974146607","https://openalex.org/W1984390944","https://openalex.org/W1995886164","https://openalex.org/W1999134021","https://openalex.org/W2013136212","https://openalex.org/W2014374971","https://openalex.org/W2016189274","https://openalex.org/W2028663402","https://openalex.org/W2028892679","https://openalex.org/W2029441437","https://openalex.org/W2029657107","https://openalex.org/W2031077023","https://openalex.org/W2032870665","https://openalex.org/W2038812321","https://openalex.org/W2039056260","https://openalex.org/W2039680623","https://openalex.org/W2041956242","https://openalex.org/W2044235819","https://openalex.org/W2045348184","https://openalex.org/W2049661143","https://openalex.org/W2051201953","https://openalex.org/W2052729310","https://openalex.org/W2054505252","https://openalex.org/W2056112526","https://openalex.org/W2058044724","https://openalex.org/W2058426472","https://openalex.org/W2059853480","https://openalex.org/W2064075965","https://openalex.org/W2081530255","https://openalex.org/W2084748297","https://openalex.org/W2090388864","https://openalex.org/W2091161312","https://openalex.org/W2092127696","https://openalex.org/W2096329971","https://openalex.org/W2100018338","https://openalex.org/W2101530945","https://openalex.org/W2102122585","https://openalex.org/W2102794349","https://openalex.org/W2108728387","https://openalex.org/W2109293916","https://openalex.org/W2109363337","https://openalex.org/W2110489138","https://openalex.org/W2110511794","https://openalex.org/W2113177393","https://openalex.org/W2114281975","https://openalex.org/W2114332734","https://openalex.org/W2119423166","https://openalex.org/W2129095580","https://openalex.org/W2131459517","https://openalex.org/W2132434674","https://openalex.org/W2134497952","https://openalex.org/W2138218344","https://openalex.org/W2139212933","https://openalex.org/W2140190241","https://openalex.org/W2142819948","https://openalex.org/W2144227498","https://openalex.org/W2145991872","https://openalex.org/W2149429041","https://openalex.org/W2149706766","https://openalex.org/W2152180708","https://openalex.org/W2153476503","https://openalex.org/W2156909104","https://openalex.org/W2162301608","https://openalex.org/W2167872870","https://openalex.org/W2168604934","https://openalex.org/W2171265988","https://openalex.org/W2283504545","https://openalex.org/W2401225555","https://openalex.org/W2407309269","https://openalex.org/W2555756618","https://openalex.org/W2800394774","https://openalex.org/W3119651796","https://openalex.org/W3147254695","https://openalex.org/W4211050817","https://openalex.org/W4214550983","https://openalex.org/W4245668478","https://openalex.org/W4250052351","https://openalex.org/W4251102884","https://openalex.org/W4301208911","https://openalex.org/W6684065032"],"related_works":["https://openalex.org/W3011059803","https://openalex.org/W2394466068","https://openalex.org/W3151736118","https://openalex.org/W1987683558","https://openalex.org/W4362495644","https://openalex.org/W2726838704","https://openalex.org/W2962780935","https://openalex.org/W2537862391","https://openalex.org/W2417174640","https://openalex.org/W4308273529"],"abstract_inverted_index":{"We":[0,52,113,125,153],"describe":[1],"a":[2,43,93],"methodology,":[3],"as":[4,6,90,92],"well":[5,91],"some":[7],"related":[8],"data":[9],"mining":[10],"tools,":[11],"for":[12,57,136,165],"analyzing":[13],"sequence":[14,50],"data.":[15,51],"The":[16],"methodology":[17,116],"comprises":[18],"three":[19,61],"steps:":[20],"(a)":[21],"generating":[22,64],"candidate":[23,65],"features":[24,31,40,72,132,160],"from":[25,32],"the":[26,33,38,75,118,138],"sequences,":[27],"(b)":[28],"selecting":[29,80],"relevant":[30,55,81],"candidates,":[34],"and":[35,87,110,130,148],"(c)":[36],"integrating":[37,99],"selected":[39,100],"to":[41,45,128,157,161],"build":[42,162],"system":[44],"recognize":[46],"specific":[47],"properties":[48],"in":[49,170],"also":[53,154],"give":[54],"techniques":[56],"each":[58],"of":[59,71,77,120],"these":[60],"steps.":[62],"For":[63,79,98],"features,":[66,82,101],"we":[67,83,102],"present":[68],"various":[69],"types":[70],"based":[73],"on":[74,117],"idea":[76],"k-grams.":[78],"discuss":[84,126,155],"signal-to-noise,":[85],"t-statistics,":[86],"entropy":[88],"measures,":[89],"correlation-based":[94],"feature":[95],"selection":[96],"method.":[97],"use":[103,158],"machine":[104],"learning":[105],"methods,":[106],"including":[107],"C4.5,":[108],"SVM,":[109],"Naive":[111],"Bayes.":[112],"illustrate":[114],"this":[115],"problem":[119],"recognizing":[121,166],"translation":[122,145,167],"initiation":[123,146,168],"sites.":[124],"how":[127,156],"generate":[129],"select":[131],"that":[133,143,150],"are":[134,144,151],"useful":[135],"understanding":[137],"distinction":[139],"between":[140],"ATG":[141],"sites":[142,147,169],"those":[149],"not.":[152],"such":[159],"reliable":[163],"systems":[164],"DNA":[171],"sequences.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":4},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
