{"id":"https://openalex.org/W2406064843","doi":"https://doi.org/10.5220/0004806201960203","title":"Generating Features using Burrows Wheeler Transformation for Biological Sequence Classification","display_name":"Generating Features using Burrows Wheeler Transformation for Biological Sequence Classification","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2406064843","doi":"https://doi.org/10.5220/0004806201960203","mag":"2406064843"},"language":"en","primary_location":{"id":"doi:10.5220/0004806201960203","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0004806201960203","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Bioinformatics Models, Methods and Algorithms","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.5220/0004806201960203","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071868223","display_name":"Karthik Tangirala","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Karthik Tangirala","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5067341711","display_name":"Doina Caragea","orcid":"https://orcid.org/0000-0002-6440-0914"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Doina Caragea","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5071868223"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2789,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.60832921,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"196","last_page":"203"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9772999882698059,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.6582300066947937},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6285617351531982},{"id":"https://openalex.org/keywords/sliding-window-protocol","display_name":"Sliding window protocol","score":0.5965997576713562},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.58689284324646},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.5517275333404541},{"id":"https://openalex.org/keywords/biological-data","display_name":"Biological data","score":0.5447015762329102},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.5379645824432373},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.511237621307373},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.49747827649116516},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.4871313273906708},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4832090139389038},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4746582806110382},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.432176411151886},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3648308515548706},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34796279668807983},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23276662826538086},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.17349925637245178},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.1651972532272339},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.15431052446365356},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.08829337358474731}],"concepts":[{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.6582300066947937},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6285617351531982},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.5965997576713562},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.58689284324646},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.5517275333404541},{"id":"https://openalex.org/C201797286","wikidata":"https://www.wikidata.org/wiki/Q4914986","display_name":"Biological data","level":2,"score":0.5447015762329102},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.5379645824432373},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.511237621307373},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.49747827649116516},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.4871313273906708},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4832090139389038},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4746582806110382},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.432176411151886},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3648308515548706},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34796279668807983},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23276662826538086},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.17349925637245178},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.1651972532272339},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.15431052446365356},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.08829337358474731},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5220/0004806201960203","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0004806201960203","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Bioinformatics Models, Methods and Algorithms","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.5220/0004806201960203","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0004806201960203","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Bioinformatics Models, Methods and Algorithms","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2353818951","https://openalex.org/W2090763504","https://openalex.org/W1605879311","https://openalex.org/W2611980620","https://openalex.org/W2385763735","https://openalex.org/W148178222","https://openalex.org/W2386394344","https://openalex.org/W2382806131","https://openalex.org/W2006683254","https://openalex.org/W3029024976"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,7,68,79,124,179],"biological":[3,23],"sciences":[4],"have":[5],"resulted":[6],"the":[8,36,72,83,96,106,122,125,171,192],"availability":[9],"of":[10,13,22,44,48,74,82,108,114,156],"large":[11],"amounts":[12],"sequence":[14,24],"data\r\n\r\n(both":[15],"DNA":[16,161,207],"and":[17,165,208],"protein":[18,166,209],"sequences).":[19],"The":[20],"annotation":[21],"data":[25,38],"can":[26,91,198],"be":[27,92],"approached":[28],"using":[29,59,137],"machine\r\n\r\nlearning":[30],"techniques.":[31],"Such":[32],"techniques":[33,90],"require":[34],"that":[35,151,170,191],"input":[37],"is":[39,55,77],"represented":[40],"as":[41],"a":[42,52,60,153],"vector":[43],"features.":[45],"In\r\n\r\nthe":[46],"absence":[47],"biologically":[49],"known":[50],"features,":[51,98],"common":[53],"approach":[54,139,146,194],"to":[56,94,134,195,200],"generate":[57,135],"k-mers":[58,84,117,150],"sliding":[61],"window.\r\n\r\nA":[62],"larger":[63],"k":[64],"value":[65],"usually":[66],"results":[67,158],"better":[69,121,183],"features;":[70],"however,":[71],"number":[73],"k-mer":[75],"features":[76,136,173,205],"exponential":[78],"k,\r\n\r\nand":[80],"many":[81],"are":[85,100,182],"not":[86],"informative.":[87],"Feature":[88],"selection":[89],"used":[93,104],"identify":[95],"most\r\n\r\ninformative":[97],"but":[99],"computationally":[101],"expensive":[102],"when":[103],"over":[105],"set":[107],"all":[109,130],"k-mers,":[110,131],"especially":[111],"over\r\n\r\nthe":[112],"space":[113],"variable":[115,148,203],"length":[116,149,204],"(which":[118],"presumably":[119],"capture":[120],"information":[123],"data).":[126],"Instead\r\n\r\nof":[127],"working":[128],"with":[129,175],"we":[132],"propose":[133],"an":[138],"based":[140],"on":[141,159],"Burrows":[142],"Wheeler\r\n\r\nTransformation":[143],"(BWT).":[144],"Our":[145],"generates":[147],"represent":[152],"small":[154],"subset":[155],"kmers.\r\n\r\nExperimental":[157],"both":[160],"(alternative":[162],"splicing":[163],"prediction)":[164],"(protein":[167],"localization)\r\n\r\nsequences":[168],"show":[169],"BWT":[172],"combined":[174],"feature":[176,196],"selection,":[177],"result":[178],"models":[180],"which":[181],"than\r\n\r\nmodels":[184],"learned":[185],"directly":[186],"from":[187],"k-mers.":[188],"This":[189],"shows":[190],"BWT-based":[193],"generation":[197],"be\r\n\r\nused":[199],"obtain":[201],"informative":[202],"for":[206],"prediction":[210],"problems.":[211]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
