{"id":"https://openalex.org/W2972605275","doi":"https://doi.org/10.1109/bibm47256.2019.8983072","title":"Unaligned Sequence Similarity Search Using Deep Learning","display_name":"Unaligned Sequence Similarity Search Using Deep Learning","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W2972605275","doi":"https://doi.org/10.1109/bibm47256.2019.8983072","mag":"2972605275"},"language":"en","primary_location":{"id":"doi:10.1109/bibm47256.2019.8983072","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm47256.2019.8983072","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1909.06929","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064199473","display_name":"James K. Senter","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"James K. Senter","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science","University Of Tennessee"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science","institution_ids":[]},{"raw_affiliation_string":"University Of Tennessee","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048569742","display_name":"Taylor M. Royalty","orcid":"https://orcid.org/0000-0003-4533-4787"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]},{"id":"https://openalex.org/I2802706902","display_name":"Knoxville College","ror":"https://ror.org/02bxrp522","country_code":"US","type":"education","lineage":["https://openalex.org/I2802706902"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taylor M. Royalty","raw_affiliation_strings":["University of Tennessee - Knoxville,Departments of Microbiology and Earth and Planetary Sciences,Knoxville,TN","University Of Tennessee"],"affiliations":[{"raw_affiliation_string":"University of Tennessee - Knoxville,Departments of Microbiology and Earth and Planetary Sciences,Knoxville,TN","institution_ids":["https://openalex.org/I75027704","https://openalex.org/I2802706902"]},{"raw_affiliation_string":"University Of Tennessee","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040578598","display_name":"Andrew D. Steen","orcid":"https://orcid.org/0000-0003-4297-4332"},"institutions":[{"id":"https://openalex.org/I2802706902","display_name":"Knoxville College","ror":"https://ror.org/02bxrp522","country_code":"US","type":"education","lineage":["https://openalex.org/I2802706902"]},{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew D. Steen","raw_affiliation_strings":["University of Tennessee - Knoxville,Departments of Microbiology and Earth and Planetary Sciences,Knoxville,TN","University Of Tennessee"],"affiliations":[{"raw_affiliation_string":"University of Tennessee - Knoxville,Departments of Microbiology and Earth and Planetary Sciences,Knoxville,TN","institution_ids":["https://openalex.org/I75027704","https://openalex.org/I2802706902"]},{"raw_affiliation_string":"University Of Tennessee","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088544434","display_name":"Amir Sadovnik","orcid":"https://orcid.org/0000-0001-9011-7365"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Amir Sadovnik","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science","University Of Tennessee"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science","institution_ids":[]},{"raw_affiliation_string":"University Of Tennessee","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5064199473"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08407883,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1892","last_page":"1899"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6110975742340088},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5721273422241211},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.5579038858413696},{"id":"https://openalex.org/keywords/nearest-neighbor-search","display_name":"Nearest neighbor search","score":0.5410699844360352},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5332216024398804},{"id":"https://openalex.org/keywords/euclidean-distance","display_name":"Euclidean distance","score":0.4556884467601776},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.454953134059906},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.44180870056152344},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.42094430327415466},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3828091025352478},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.368646502494812},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22042259573936462},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.14360836148262024}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6110975742340088},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5721273422241211},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.5579038858413696},{"id":"https://openalex.org/C116738811","wikidata":"https://www.wikidata.org/wiki/Q608751","display_name":"Nearest neighbor search","level":2,"score":0.5410699844360352},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5332216024398804},{"id":"https://openalex.org/C120174047","wikidata":"https://www.wikidata.org/wiki/Q847073","display_name":"Euclidean distance","level":2,"score":0.4556884467601776},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.454953134059906},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.44180870056152344},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.42094430327415466},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3828091025352478},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.368646502494812},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22042259573936462},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.14360836148262024},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/bibm47256.2019.8983072","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm47256.2019.8983072","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1909.06929","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.06929","pdf_url":"https://arxiv.org/pdf/1909.06929","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2972605275","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1909.06929v1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1909.06929","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1909.06929","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1909.06929","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.06929","pdf_url":"https://arxiv.org/pdf/1909.06929","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5584389321","display_name":null,"funder_award_id":"unknown","funder_id":"https://openalex.org/F4320307102","funder_display_name":"Intel Corporation"}],"funders":[{"id":"https://openalex.org/F4320307102","display_name":"Intel Corporation","ror":"https://ror.org/01ek73717"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2972605275.pdf","grobid_xml":"https://content.openalex.org/works/W2972605275.grobid-xml"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W196214544","https://openalex.org/W1627400044","https://openalex.org/W1810943226","https://openalex.org/W1932434485","https://openalex.org/W1976325156","https://openalex.org/W1991133427","https://openalex.org/W2011025549","https://openalex.org/W2045204781","https://openalex.org/W2055043387","https://openalex.org/W2060797027","https://openalex.org/W2064675550","https://openalex.org/W2072234624","https://openalex.org/W2076305282","https://openalex.org/W2077563243","https://openalex.org/W2077574412","https://openalex.org/W2096041635","https://openalex.org/W2096093282","https://openalex.org/W2096733369","https://openalex.org/W2101234009","https://openalex.org/W2102281041","https://openalex.org/W2115595474","https://openalex.org/W2124351063","https://openalex.org/W2125838338","https://openalex.org/W2127175247","https://openalex.org/W2131774270","https://openalex.org/W2138122982","https://openalex.org/W2141356165","https://openalex.org/W2142678478","https://openalex.org/W2158714788","https://openalex.org/W2160053034","https://openalex.org/W2173732482","https://openalex.org/W2325939864","https://openalex.org/W2336509392","https://openalex.org/W2574212476","https://openalex.org/W2798209956","https://openalex.org/W2882018707","https://openalex.org/W2922146544","https://openalex.org/W2949346354","https://openalex.org/W2950577311","https://openalex.org/W2964039506","https://openalex.org/W2964771029","https://openalex.org/W3099206234","https://openalex.org/W4236236547","https://openalex.org/W6607974698","https://openalex.org/W6636759986","https://openalex.org/W6675354045","https://openalex.org/W6700903540","https://openalex.org/W6753113897"],"related_works":["https://openalex.org/W3004650560","https://openalex.org/W2765633183","https://openalex.org/W2515698080","https://openalex.org/W3029785921","https://openalex.org/W2563784674","https://openalex.org/W1583581687","https://openalex.org/W2963017196","https://openalex.org/W2903382985","https://openalex.org/W2614833841","https://openalex.org/W2899481872","https://openalex.org/W1548108731","https://openalex.org/W2187967446","https://openalex.org/W1508164074","https://openalex.org/W2963743179","https://openalex.org/W2510906293","https://openalex.org/W1588529487","https://openalex.org/W1519517812","https://openalex.org/W2477323061","https://openalex.org/W2523724266","https://openalex.org/W2155844767"],"abstract_inverted_index":{"Gene":[0],"annotation":[1],"has":[2],"traditionally":[3],"required":[4],"direct":[5],"comparison":[6,22,47,178],"of":[7,17,63,101,104,171,216],"DNA":[8,80],"sequences":[9,83,106,243],"between":[10],"an":[11],"unknown":[12],"gene":[13,34,147,173],"and":[14,60,107,167,195,238],"a":[15,33,38,61,71,85,145],"database":[16,53,137,158],"known":[18],"ones":[19,131],"using":[20,74],"string":[21,64],"methods.":[23,153],"However,":[24,154],"these":[25],"methods":[26,184],"do":[27,120],"not":[28,36,121,249],"provide":[29,142],"useful":[30,228],"information":[31,116],"when":[32,51,233],"does":[35],"have":[37,122,248],"close":[39],"match":[40],"in":[41,84,88,132,151],"the":[42,52,102,133,157,172,186,192,207,214],"database.":[43,134],"In":[44,66,175],"addition,":[45,176],"each":[46,177],"can":[48,141,202,226],"be":[49,203,227],"costly":[50],"is":[54,138,149,159,179,189],"large":[55],"since":[56,185],"it":[57,111,161],"requires":[58],"alignments":[59],"series":[62],"comparisons.":[65],"this":[67,140],"work":[68],"we":[69,221],"propose":[70],"novel":[72],"approach:":[73],"recurrent":[75],"neural":[76],"networks":[77],"to":[78,114,129,164,191,205,245],"embed":[79],"or":[81],"amino-acid":[82],"low-dimensional":[86],"space":[87,97],"which":[89,119,247],"distances":[90],"correlate":[91],"with":[92],"functional":[93],"similarity.":[94],"This":[95],"embedding":[96,225],"overcomes":[98],"both":[99,230],"shortcomings":[100],"method":[103],"aligning":[105],"comparing":[108],"homology.":[109],"First,":[110],"allows":[112,162],"us":[113,163],"obtain":[115],"about":[117],"genes":[118],"exact":[123],"matches":[124],"by":[125],"measuring":[126],"their":[127],"similarity":[128],"other":[130],"If":[135],"our":[136,217,224,234,242],"labeled":[139],"labels":[143,235],"for":[144,229],"query":[146],"as":[148],"done":[150],"traditional":[152,183],"even":[155],"if":[156],"unlabeled":[160],"find":[165,206],"clusters":[166],"infer":[168],"some":[169],"characteristics":[170],"population.":[174],"much":[180],"faster":[181],"than":[182],"distance":[187],"metric":[188],"reduced":[190],"Euclidean":[193],"distance,":[194],"thus":[196],"efficient":[197],"approximate":[198],"nearest":[199],"neighbor":[200],"algorithms":[201],"used":[204],"best":[208],"match.":[209],"We":[210],"present":[211],"results":[212],"showing":[213],"advantage":[215],"algorithm.":[218],"More":[219],"specifically":[220],"show":[222],"how":[223],"classification":[231],"tasks":[232,240],"are":[236],"known,":[237],"clustering":[239],"where":[241],"belong":[244],"classes":[246],"been":[250],"seen":[251],"before.":[252]},"counts_by_year":[],"updated_date":"2026-03-15T09:29:46.208133","created_date":"2025-10-10T00:00:00"}
