{"id":"https://openalex.org/W2131479294","doi":"https://doi.org/10.1093/bioinformatics/btm479","title":"Improved BLAST searches using longer words for protein seeding","display_name":"Improved BLAST searches using longer words for protein seeding","publication_year":2007,"publication_date":"2007-10-06","ids":{"openalex":"https://openalex.org/W2131479294","doi":"https://doi.org/10.1093/bioinformatics/btm479","mag":"2131479294","pmid":"https://pubmed.ncbi.nlm.nih.gov/17921491"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/btm479","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btm479","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/23/21/2949/49822796/bioinformatics_23_21_2949.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://academic.oup.com/bioinformatics/article-pdf/23/21/2949/49822796/bioinformatics_23_21_2949.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072196261","display_name":"Sergey Shiryev","orcid":"https://orcid.org/0009-0007-2799-0111"},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]},{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sergey A. Shiryev","raw_affiliation_strings":["Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health","Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health, Bethesda, MD, USA"],"affiliations":[{"raw_affiliation_string":"Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]},{"raw_affiliation_string":"Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health, Bethesda, MD, USA","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021448822","display_name":"Jason S. Papadopoulos","orcid":null},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]},{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason S. Papadopoulos","raw_affiliation_strings":["Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health"],"affiliations":[{"raw_affiliation_string":"Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000317216","display_name":"Alejandro A. Sch\u00e4ffer","orcid":"https://orcid.org/0000-0002-2147-8033"},"institutions":[{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]},{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alejandro A. Sch\u00e4ffer","raw_affiliation_strings":["Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health"],"affiliations":[{"raw_affiliation_string":"Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032663560","display_name":"Richa Agarwala","orcid":"https://orcid.org/0000-0002-5518-9723"},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]},{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richa Agarwala","raw_affiliation_strings":["Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health"],"affiliations":[{"raw_affiliation_string":"Department of Health and Human Services, National Center for Biotechnology Information, National Institutes of Health","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5072196261"],"corresponding_institution_ids":["https://openalex.org/I1299303238","https://openalex.org/I4210109390"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":{"value":3618,"currency":"USD","value_usd":3618},"fwci":0.5378,"has_fulltext":true,"cited_by_count":125,"citation_normalized_percentile":{"value":0.66529276,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"23","issue":"21","first_page":"2949","last_page":"2951"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.804696261882782},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7485977411270142},{"id":"https://openalex.org/keywords/file-transfer-protocol","display_name":"File Transfer Protocol","score":0.5422208905220032},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5173396468162537},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.502739429473877},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.502143383026123},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.41512736678123474},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35545629262924194},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3358456790447235},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3255467414855957},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2284412980079651},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.12822294235229492},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10801872611045837},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.09862753748893738},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.07793182134628296}],"concepts":[{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.804696261882782},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7485977411270142},{"id":"https://openalex.org/C169485995","wikidata":"https://www.wikidata.org/wiki/Q42283","display_name":"File Transfer Protocol","level":3,"score":0.5422208905220032},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5173396468162537},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.502739429473877},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.502143383026123},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.41512736678123474},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35545629262924194},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3358456790447235},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3255467414855957},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2284412980079651},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.12822294235229492},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10801872611045837},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.09862753748893738},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.07793182134628296},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.0},{"id":"https://openalex.org/C86339819","wikidata":"https://www.wikidata.org/wiki/Q407384","display_name":"Transcription factor","level":3,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C158448853","wikidata":"https://www.wikidata.org/wiki/Q425218","display_name":"Repressor","level":4,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003196","descriptor_name":"Computer Graphics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003196","descriptor_name":"Computer Graphics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003196","descriptor_name":"Computer Graphics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D014584","descriptor_name":"User-Computer Interface","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D014584","descriptor_name":"User-Computer Interface","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D014584","descriptor_name":"User-Computer Interface","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1093/bioinformatics/btm479","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btm479","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/23/21/2949/49822796/bioinformatics_23_21_2949.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:17921491","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/17921491","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.106.299","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.106.299","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://bioinformatics.oxfordjournals.org/cgi/reprint/btm479v1.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.1093/bioinformatics/btm479","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btm479","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/23/21/2949/49822796/bioinformatics_23_21_2949.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals","score":0.4399999976158142}],"awards":[],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337372","display_name":"U.S. National Library of Medicine","ror":"https://ror.org/0060t0j89"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2131479294.pdf","grobid_xml":"https://content.openalex.org/works/W2131479294.grobid-xml"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W1973578915","https://openalex.org/W1988670311","https://openalex.org/W2006290495","https://openalex.org/W2046892081","https://openalex.org/W2055043387","https://openalex.org/W2057783089","https://openalex.org/W2084787613","https://openalex.org/W2116744776","https://openalex.org/W2133312664","https://openalex.org/W2133657429","https://openalex.org/W2143210482","https://openalex.org/W2158714788"],"related_works":["https://openalex.org/W2349516242","https://openalex.org/W2378353542","https://openalex.org/W2358003099","https://openalex.org/W2373243130","https://openalex.org/W2479761186","https://openalex.org/W3088189910","https://openalex.org/W2389928460","https://openalex.org/W2351853150","https://openalex.org/W2386192326","https://openalex.org/W2167962184"],"abstract_inverted_index":{"The":[0],"option":[1],"to":[2,27],"use":[3],"long":[4],"words":[5],"is":[6,32],"in":[7],"the":[8,29],"NCBI":[9],"C":[10],"and":[11],"C++":[12],"toolkit":[13],"code":[14],"for":[15],"BLAST,":[16],"starting":[17],"with":[18],"version":[19],"2.2.16":[20],"of":[21],"blastall.":[22],"A":[23],"Linux":[24],"executable":[25],"used":[26],"produce":[28],"results":[30],"herein":[31],"available":[33],"at:":[34],"ftp://ftp.ncbi.nlm.nih.gov/pub/agarwala/protein_longwords":[35]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":21},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":19},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-07T14:57:38.498316","created_date":"2025-10-10T00:00:00"}
