{"id":"https://openalex.org/W2074202411","doi":"https://doi.org/10.1142/s0219720012500163","title":"SUITE OF TOOLS FOR STATISTICAL N-GRAM LANGUAGE MODELING FOR PATTERN MINING IN WHOLE GENOME SEQUENCES","display_name":"SUITE OF TOOLS FOR STATISTICAL N-GRAM LANGUAGE MODELING FOR PATTERN MINING IN WHOLE GENOME SEQUENCES","publication_year":2012,"publication_date":"2012-06-15","ids":{"openalex":"https://openalex.org/W2074202411","doi":"https://doi.org/10.1142/s0219720012500163","mag":"2074202411","pmid":"https://pubmed.ncbi.nlm.nih.gov/22817111"},"language":"en","primary_location":{"id":"doi:10.1142/s0219720012500163","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219720012500163","pdf_url":null,"source":{"id":"https://openalex.org/S155349577","display_name":"Journal of Bioinformatics and Computational Biology","issn_l":"0219-7200","issn":["0219-7200","1757-6334"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311754","host_organization_name":"Imperial College Press","host_organization_lineage":["https://openalex.org/P4310311754"],"host_organization_lineage_names":["Imperial College Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Bioinformatics and Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000049528","display_name":"Madhavi K. Ganapathiraju","orcid":"https://orcid.org/0000-0002-3825-0924"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"MADHAVI K. GANAPATHIRAJU","raw_affiliation_strings":["Department of Biomedical Informatics, University of Pittsburgh, 5607 Baum Boulevard, Suite BAUM 423, Pittsburgh, PA 15206-3701, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, University of Pittsburgh, 5607 Baum Boulevard, Suite BAUM 423, Pittsburgh, PA 15206-3701, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040083108","display_name":"Asia Mitchell","orcid":"https://orcid.org/0000-0003-0969-1680"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"ASIA D. MITCHELL","raw_affiliation_strings":["Department of Biomedical Informatics, University of Pittsburgh, 5607 Baum Boulevard, Suite BAUM 423, Pittsburgh, PA 15206-3701, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, University of Pittsburgh, 5607 Baum Boulevard, Suite BAUM 423, Pittsburgh, PA 15206-3701, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111555897","display_name":"Thahir Mohamed","orcid":null},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]},{"id":"https://openalex.org/I4210105785","display_name":"Intelligent Systems Research (United States)","ror":"https://ror.org/01reevc91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210105785"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"MOHAMED THAHIR","raw_affiliation_strings":["Department of Biomedical Informatics, University of Pittsburgh, 5607 Baum Boulevard, Suite BAUM 423, Pittsburgh, PA 15206-3701, USA","Intelligent Systems Program, University of Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, University of Pittsburgh, 5607 Baum Boulevard, Suite BAUM 423, Pittsburgh, PA 15206-3701, USA","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"Intelligent Systems Program, University of Pittsburgh, USA","institution_ids":["https://openalex.org/I4210105785","https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003175276","display_name":"Kamiya Motwani","orcid":"https://orcid.org/0009-0004-1030-3299"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"KAMIYA MOTWANI","raw_affiliation_strings":["Supercomputer Education and Research Centre, Indian Institute of Science, Bangalore 560012, India","Supercomputer Education and Research Centre, Indian Institute of Science, Bangalore 560012, India#TAB#"],"affiliations":[{"raw_affiliation_string":"Supercomputer Education and Research Centre, Indian Institute of Science, Bangalore 560012, India","institution_ids":["https://openalex.org/I59270414"]},{"raw_affiliation_string":"Supercomputer Education and Research Centre, Indian Institute of Science, Bangalore 560012, India#TAB#","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037049960","display_name":"Seshan Ananthasubramanian","orcid":null},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]},{"id":"https://openalex.org/I4210105785","display_name":"Intelligent Systems Research (United States)","ror":"https://ror.org/01reevc91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210105785"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"SESHAN ANANTHASUBRAMANIAN","raw_affiliation_strings":["Department of Biomedical Informatics, University of Pittsburgh, 5607 Baum Boulevard, Suite BAUM 423, Pittsburgh, PA 15206-3701, USA","Intelligent Systems Program, University of Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, University of Pittsburgh, 5607 Baum Boulevard, Suite BAUM 423, Pittsburgh, PA 15206-3701, USA","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"Intelligent Systems Program, University of Pittsburgh, USA","institution_ids":["https://openalex.org/I4210105785","https://openalex.org/I170201317"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5000049528"],"corresponding_institution_ids":["https://openalex.org/I170201317"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.1021936,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"10","issue":"06","first_page":"1250016","last_page":"1250016"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.6779049634933472},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.6746910810470581},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.6409696936607361},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.6391527056694031},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.5891633629798889},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5692996978759766},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5674450993537903},{"id":"https://openalex.org/keywords/whole-genome-sequencing","display_name":"Whole genome sequencing","score":0.4950844347476959},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.47353312373161316},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.45089957118034363},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.432799756526947},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.3459726870059967},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30096864700317383},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.1764468252658844}],"concepts":[{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.6779049634933472},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.6746910810470581},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.6409696936607361},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.6391527056694031},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.5891633629798889},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5692996978759766},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5674450993537903},{"id":"https://openalex.org/C24432333","wikidata":"https://www.wikidata.org/wiki/Q2068526","display_name":"Whole genome sequencing","level":4,"score":0.4950844347476959},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.47353312373161316},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45089957118034363},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.432799756526947},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.3459726870059967},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30096864700317383},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.1764468252658844},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":3,"locations":[{"id":"doi:10.1142/s0219720012500163","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219720012500163","pdf_url":null,"source":{"id":"https://openalex.org/S155349577","display_name":"Journal of Bioinformatics and Computational Biology","issn_l":"0219-7200","issn":["0219-7200","1757-6334"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311754","host_organization_name":"Imperial College Press","host_organization_lineage":["https://openalex.org/P4310311754"],"host_organization_lineage_names":["Imperial College Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Bioinformatics and Computational Biology","raw_type":"journal-article"},{"id":"pmid:22817111","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/22817111","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of bioinformatics and computational biology","raw_type":null},{"id":"pmh:oai:eprints.iisc.ac.in:45363","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196309","display_name":"NOT FOUND REPOSITORY (Indian Institute of Science Bangalore)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59270414","host_organization_name":"Indian Institute of Science Bangalore","host_organization_lineage":["https://openalex.org/I59270414"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6299999952316284,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W348019042","https://openalex.org/W938539187","https://openalex.org/W1518399713","https://openalex.org/W1560120988","https://openalex.org/W1592870802","https://openalex.org/W1600139443","https://openalex.org/W1710724155","https://openalex.org/W1948467576","https://openalex.org/W1965941177","https://openalex.org/W1972017488","https://openalex.org/W1977358869","https://openalex.org/W1978937741","https://openalex.org/W1990061958","https://openalex.org/W1990412107","https://openalex.org/W1991022130","https://openalex.org/W1996423252","https://openalex.org/W1996640618","https://openalex.org/W2013659410","https://openalex.org/W2030962049","https://openalex.org/W2044462400","https://openalex.org/W2045761051","https://openalex.org/W2048241494","https://openalex.org/W2049083740","https://openalex.org/W2064701032","https://openalex.org/W2065504376","https://openalex.org/W2069891540","https://openalex.org/W2070376921","https://openalex.org/W2081288241","https://openalex.org/W2084748297","https://openalex.org/W2087658093","https://openalex.org/W2092067718","https://openalex.org/W2111771767","https://openalex.org/W2112814753","https://openalex.org/W2117502874","https://openalex.org/W2118703123","https://openalex.org/W2121925697","https://openalex.org/W2125270610","https://openalex.org/W2134114724","https://openalex.org/W2136145671","https://openalex.org/W2142619120","https://openalex.org/W2148753327","https://openalex.org/W2149588035","https://openalex.org/W2149915514","https://openalex.org/W2155210956","https://openalex.org/W2162948153","https://openalex.org/W2168909179","https://openalex.org/W2169317607","https://openalex.org/W4238014149"],"related_works":["https://openalex.org/W2252095989","https://openalex.org/W4322096525","https://openalex.org/W2551914602","https://openalex.org/W4281893144","https://openalex.org/W2084531783","https://openalex.org/W2787311093","https://openalex.org/W2132221452","https://openalex.org/W2105076537","https://openalex.org/W1577263324","https://openalex.org/W2169518243"],"abstract_inverted_index":{"Genome":[0],"sequences":[1,12],"contain":[2],"a":[3,17],"number":[4],"of":[5,13,20,22,63],"patterns":[6],"that":[7],"have":[8],"biomedical":[9],"significance.":[10],"Repetitive":[11],"various":[14],"kinds":[15],"are":[16],"primary":[18],"component":[19],"most":[21],"the":[23,29,50,61],"genomic":[24],"sequence":[25,53],"patterns.":[26,58],"We":[27,59],"extended":[28],"suffix-array":[30],"based":[31,45],"Biological":[32],"Language":[33],"Modeling":[34],"Toolkit":[35],"to":[36,54],"compute":[37],"n-gram":[38,43],"frequencies":[39],"as":[40,42],"well":[41],"language-model":[44],"perplexity":[46],"in":[47],"windows":[48],"over":[49],"whole":[51,71],"genome":[52,73],"find":[55],"biologically":[56],"relevant":[57],"present":[60],"suite":[62],"tools":[64],"and":[65],"their":[66],"application":[67],"for":[68],"analysis":[69],"on":[70],"human":[72],"sequence.":[74]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2016,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
