{"id":"https://openalex.org/W2144268209","doi":"https://doi.org/10.1093/bioinformatics/bti1018","title":"De novo identification of repeat families in large genomes","display_name":"De novo identification of repeat families in large genomes","publication_year":2005,"publication_date":"2005-06-01","ids":{"openalex":"https://openalex.org/W2144268209","doi":"https://doi.org/10.1093/bioinformatics/bti1018","mag":"2144268209","pmid":"https://pubmed.ncbi.nlm.nih.gov/15961478"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/bti1018","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/bti1018","pdf_url":null,"source":{"id":"https://openalex.org/S4210188263","display_name":"Computer applications in the biosciences","issn_l":"0266-7061","issn":["0266-7061","1460-2059"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066797582","display_name":"Alkes L. Price","orcid":"https://orcid.org/0000-0002-2971-7975"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"A. L. Price","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California San Diego La Jolla, CA 92093-0114, USA","Department of Computer Science and Engineering, University of California \u2014 San Diego, La Jolla, CA 92093-0114, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California San Diego La Jolla, CA 92093-0114, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California \u2014 San Diego, La Jolla, CA 92093-0114, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101920266","display_name":"N C Jones","orcid":"https://orcid.org/0000-0002-3300-0571"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"N. C. Jones","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California San Diego La Jolla, CA 92093-0114, USA","Department of Computer Science and Engineering, University of California \u2014 San Diego, La Jolla, CA 92093-0114, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California San Diego La Jolla, CA 92093-0114, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California \u2014 San Diego, La Jolla, CA 92093-0114, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027480961","display_name":"Pavel A. Pevzner","orcid":"https://orcid.org/0000-0002-0418-165X"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"P. A. Pevzner","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California San Diego La Jolla, CA 92093-0114, USA","Department of Computer Science and Engineering, University of California \u2014 San Diego, La Jolla, CA 92093-0114, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California San Diego La Jolla, CA 92093-0114, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California \u2014 San Diego, La Jolla, CA 92093-0114, USA","institution_ids":["https://openalex.org/I36258959"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5066797582"],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":null,"apc_paid":null,"fwci":6.4468,"has_fulltext":false,"cited_by_count":2321,"citation_normalized_percentile":{"value":0.9753872,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"21","issue":"Suppl 1","first_page":"i351","last_page":"i358"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.578499972820282,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.578499972820282,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13664","display_name":"Genome Rearrangement Algorithms","score":0.08990000188350677,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10949","display_name":"Genetic Neurodegenerative Diseases","score":0.08500000089406967,"subfield":{"id":"https://openalex.org/subfields/2804","display_name":"Cellular and Molecular Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.8034858703613281},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.6105790734291077},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.5921827554702759},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.4812578856945038},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.4491349756717682},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4003477096557617},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.11230278015136719}],"concepts":[{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.8034858703613281},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.6105790734291077},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.5921827554702759},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.4812578856945038},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.4491349756717682},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4003477096557617},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.11230278015136719},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D002107","descriptor_name":"Caenorhabditis","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D002107","descriptor_name":"Caenorhabditis","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D002107","descriptor_name":"Caenorhabditis","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D008957","descriptor_name":"Models, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008957","descriptor_name":"Models, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008957","descriptor_name":"Models, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012091","descriptor_name":"Repetitive Sequences, Nucleic Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012091","descriptor_name":"Repetitive Sequences, Nucleic Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012091","descriptor_name":"Repetitive Sequences, Nucleic Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D017173","descriptor_name":"Caenorhabditis elegans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017173","descriptor_name":"Caenorhabditis elegans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017173","descriptor_name":"Caenorhabditis elegans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D020407","descriptor_name":"Internet","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020407","descriptor_name":"Internet","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020407","descriptor_name":"Internet","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D051379","descriptor_name":"Mice","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D051379","descriptor_name":"Mice","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D051379","descriptor_name":"Mice","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D051381","descriptor_name":"Rats","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D051381","descriptor_name":"Rats","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D051381","descriptor_name":"Rats","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1093/bioinformatics/bti1018","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/bti1018","pdf_url":null,"source":{"id":"https://openalex.org/S4210188263","display_name":"Computer applications in the biosciences","issn_l":"0266-7061","issn":["0266-7061","1460-2059"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:15961478","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/15961478","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W162171762","https://openalex.org/W1578516052","https://openalex.org/W1597644352","https://openalex.org/W1966585515","https://openalex.org/W1971926406","https://openalex.org/W1973009845","https://openalex.org/W2050557699","https://openalex.org/W2051916497","https://openalex.org/W2055043387","https://openalex.org/W2071398217","https://openalex.org/W2085239230","https://openalex.org/W2087064593","https://openalex.org/W2090415636","https://openalex.org/W2097040991","https://openalex.org/W2099809434","https://openalex.org/W2112814753","https://openalex.org/W2116628377","https://openalex.org/W2122954888","https://openalex.org/W2136238707","https://openalex.org/W2136651963","https://openalex.org/W2151899848","https://openalex.org/W2161248563","https://openalex.org/W2167673155"],"related_works":["https://openalex.org/W2082860237","https://openalex.org/W2119695867","https://openalex.org/W2130076355","https://openalex.org/W1990804418","https://openalex.org/W1993764875","https://openalex.org/W2788277189","https://openalex.org/W2013243191","https://openalex.org/W2151865869","https://openalex.org/W2117258802","https://openalex.org/W4234157524"],"abstract_inverted_index":{"MOTIVATION:":[0],"De":[1],"novo":[2,46,91],"repeat":[3,31,49,63,69,92],"family":[4,93],"identification":[5,47,94],"is":[6,24,75,79,126],"a":[7,25,41,59,65],"challenging":[8],"algorithmic":[9],"problem":[10],"of":[11,18,48,53,62,81,106,112,118],"great":[12],"practical":[13],"importance.":[14],"As":[15],"the":[16,30,86,107],"number":[17],"genome":[19,109],"sequencing":[20],"projects":[21],"increases,":[22],"there":[23],"pressing":[26],"need":[27],"to":[28],"identify":[29],"families":[32,50],"present":[33],"in":[34,68,95],"large,":[35],"newly":[36,96],"sequenced":[37,97],"genomes.":[38,98],"We":[39],"develop":[40],"new":[42],"method":[43,57],"for":[44,89,128],"de":[45,90],"via":[51],"extension":[52],"consensus":[54],"seeds;":[55],"our":[56],"enables":[58],"rigorous":[60],"definition":[61],"boundaries,":[64],"key":[66],"issue":[67],"analysis.":[70],"RESULTS:":[71],"Our":[72],"RepeatScout":[73],"algorithm":[74],"more":[76],"sensitive":[77],"and":[78,110,114],"orders":[80],"magnitude":[82],"faster":[83],"than":[84],"RECON,":[85],"dominant":[87],"tool":[88],"Using":[99],"RepeatScout,":[100],"we":[101],"estimate":[102],"that":[103],"approximately":[104],"2%":[105],"human":[108],"4%":[111],"mouse":[113],"rat":[115],"genomes":[116],"consist":[117],"previously":[119],"unannotated":[120],"repetitive":[121],"sequence.":[122],"AVAILABILITY:":[123],"Source":[124],"code":[125],"available":[127],"download":[129],"at":[130],"http://www-cse.ucsd.edu/groups/bioinformatics/software.html":[131]},"counts_by_year":[{"year":2026,"cited_by_count":76},{"year":2025,"cited_by_count":223},{"year":2024,"cited_by_count":256},{"year":2023,"cited_by_count":219},{"year":2022,"cited_by_count":244},{"year":2021,"cited_by_count":303},{"year":2020,"cited_by_count":221},{"year":2019,"cited_by_count":122},{"year":2018,"cited_by_count":97},{"year":2017,"cited_by_count":73},{"year":2016,"cited_by_count":89},{"year":2015,"cited_by_count":81},{"year":2014,"cited_by_count":74},{"year":2013,"cited_by_count":56},{"year":2012,"cited_by_count":46}],"updated_date":"2026-06-23T13:55:30.953635","created_date":"2025-10-10T00:00:00"}
