{"id":"https://openalex.org/W1987510927","doi":"https://doi.org/10.1109/bibm.2014.6999201","title":"MRSMRS: Mining repetitive sequences in a MapReduce setting","display_name":"MRSMRS: Mining repetitive sequences in a MapReduce setting","publication_year":2014,"publication_date":"2014-11-01","ids":{"openalex":"https://openalex.org/W1987510927","doi":"https://doi.org/10.1109/bibm.2014.6999201","mag":"1987510927"},"language":"en","primary_location":{"id":"doi:10.1109/bibm.2014.6999201","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2014.6999201","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108505505","display_name":"Hongfei Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hongfei Cao","raw_affiliation_strings":["Department of Computer Science, University of Missouri, Columbia, MO, USA","Department of Computer Science, University of Missouri, Columbia MO 65211, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Missouri, Columbia, MO, USA","institution_ids":["https://openalex.org/I76835614"]},{"raw_affiliation_string":"Department of Computer Science, University of Missouri, Columbia MO 65211, USA#TAB#","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113781818","display_name":"Michael Phinney","orcid":null},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Phinney","raw_affiliation_strings":["Department of Computer Science, University of Missouri, Columbia, MO, USA","Department of Computer Science, University of Missouri, Columbia MO 65211, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Missouri, Columbia, MO, USA","institution_ids":["https://openalex.org/I76835614"]},{"raw_affiliation_string":"Department of Computer Science, University of Missouri, Columbia MO 65211, USA#TAB#","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026228186","display_name":"Devin Petersohn","orcid":null},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Devin Petersohn","raw_affiliation_strings":["Department of Computer Science, University of Missouri, Columbia, MO, USA","Department of Computer Science, University of Missouri, Columbia MO 65211, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Missouri, Columbia, MO, USA","institution_ids":["https://openalex.org/I76835614"]},{"raw_affiliation_string":"Department of Computer Science, University of Missouri, Columbia MO 65211, USA#TAB#","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065566965","display_name":"Benjamin Ryan Merideth","orcid":null},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Benjamin Merideth","raw_affiliation_strings":["Informatics Institute, University of Missouri, Columbia, MO, USA","Informatics Institute, University of Missouri, Columbia, MO 65211, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Informatics Institute, University of Missouri, Columbia, MO, USA","institution_ids":["https://openalex.org/I76835614"]},{"raw_affiliation_string":"Informatics Institute, University of Missouri, Columbia, MO 65211, USA#TAB#","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013031415","display_name":"Chi\u2010Ren Shyu","orcid":"https://orcid.org/0000-0001-9197-9522"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chi-Ren Shyu","raw_affiliation_strings":["Electrical and Computer Engineering, University of Missouri, Columbia, MO, USA","Department of Computer Science, University of Missouri, Columbia MO 65211, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Missouri, Columbia, MO, USA","institution_ids":["https://openalex.org/I76835614"]},{"raw_affiliation_string":"Department of Computer Science, University of Missouri, Columbia MO 65211, USA#TAB#","institution_ids":["https://openalex.org/I76835614"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5108505505"],"corresponding_institution_ids":["https://openalex.org/I76835614"],"apc_list":null,"apc_paid":null,"fwci":0.2789,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.57750408,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"6","issue":null,"first_page":"463","last_page":"470"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9836000204086304,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10497","display_name":"Fungal and yeast genetics research","score":0.974399983882904,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.7123103737831116},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6635146141052246},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5766080617904663},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.47542837262153625},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.4606151580810547},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4407252371311188},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4363309442996979},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.41081929206848145},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3543824553489685},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.348267138004303},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.21451687812805176},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.16921862959861755},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.13413110375404358}],"concepts":[{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.7123103737831116},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6635146141052246},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5766080617904663},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.47542837262153625},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.4606151580810547},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4407252371311188},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4363309442996979},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.41081929206848145},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3543824553489685},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.348267138004303},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.21451687812805176},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.16921862959861755},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.13413110375404358},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bibm.2014.6999201","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2014.6999201","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306106","display_name":"U.S. Department of Education","ror":"https://ror.org/021adze67"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1497737511","https://openalex.org/W1805167282","https://openalex.org/W1966021055","https://openalex.org/W1970814720","https://openalex.org/W1979402262","https://openalex.org/W1981374009","https://openalex.org/W2015159527","https://openalex.org/W2096128575","https://openalex.org/W2096634299","https://openalex.org/W2100990361","https://openalex.org/W2108828818","https://openalex.org/W2112814753","https://openalex.org/W2116628377","https://openalex.org/W2122954888","https://openalex.org/W2123546399","https://openalex.org/W2131581981","https://openalex.org/W2158714788","https://openalex.org/W2163630989","https://openalex.org/W2173213060","https://openalex.org/W2624304035","https://openalex.org/W6738874634"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4390608645","https://openalex.org/W4321353415","https://openalex.org/W4247566972","https://openalex.org/W2745001401","https://openalex.org/W4394895745","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W4206777497"],"abstract_inverted_index":{"Recent":[0],"research":[1],"suggests":[2],"DNA":[3],"repeats":[4,87],"play":[5],"critical":[6],"roles":[7],"in":[8,36,72,186],"cellular":[9],"regulatory":[10],"functions":[11],"and":[12,59,114,120,209,264],"disease":[13],"development.":[14],"Also,":[15],"repeat":[16],"variability":[17],"among":[18,39],"different":[19,40],"species,":[20,24],"or":[21],"the":[22,30,73,95,157,213],"same":[23],"is":[25],"an":[26,242],"important":[27],"indicator":[28],"for":[29],"development":[31],"of":[32,63,75,77,80,91,97,107,127,134,152,166,215,219,225,237,245],"specific":[33],"phenotypes.":[34],"Similarities":[35],"repetitive":[37,220],"sequences":[38,221],"species":[41],"have":[42,61],"been":[43,62],"shown":[44],"to":[45,70,141,178,190,198,212,241],"indicate":[46],"deeply":[47],"conserved":[48,54],"functions.":[49],"Patterns":[50],"such":[51],"as":[52],"ultra":[53],"elements":[55],"(UCEs),":[56],"tandem":[57],"repeats,":[58],"palindromes":[60],"interest.":[64],"Researchers":[65],"utilize":[66,156],"various":[67,217],"computational":[68],"approaches":[69,122,263],"aid":[71],"identification":[74],"each":[76,171],"these":[78],"types":[79,218],"patterns.":[81],"The":[82,102],"challenge":[83,214],"associated":[84],"with":[85],"identifying":[86,216],"across":[88,222],"a":[89,131,164,173,205,223,235,256],"collection":[90,133,224,236],"genomes":[92,146],"arises":[93],"from":[94],"amount":[96],"data":[98,116,137,140,151,183,195],"stored":[99],"within":[100],"DNA.":[101],"human":[103],"genome":[104],"alone":[105],"consists":[106],"more":[108],"than":[109],"3.1":[110],"billion":[111,247],"base":[112,248],"pairs,":[113],"intermediate":[115,182],"generated":[117],"by":[118],"alignment-":[119],"hash-based":[121],"are":[123,147,184,253],"substantial.":[124],"This":[125],"sort":[126],"all-against-all":[128],"analysis":[129],"on":[130,163],"large":[132],"genomic":[135,194],"sequence":[136],"often":[138],"requires":[139],"be":[142],"reprocessed":[143],"when":[144],"new":[145,193],"collected.":[148],"To":[149],"handle":[150],"this":[153,228],"scale,":[154],"we":[155,230],"Hadoop":[158],"Distributed":[159],"File":[160],"System":[161],"running":[162],"cluster":[165],"11":[167],"relatively":[168],"inexpensive":[169],"nodes,":[170],"containing":[172],"quad-core":[174],"commodity":[175],"processor.":[176],"Furthermore,":[177],"alleviate":[179],"redundant":[180],"computation,":[181],"organized":[185],"HBase,":[187],"allowing":[188],"us":[189],"incrementally":[191],"process":[192],"without":[196],"having":[197],"reprocess":[199],"existing":[200],"genomes.":[201,226],"Our":[202],"approach":[203],"lends":[204],"cost-effective,":[206],"flexible,":[207],"robust,":[208],"scalable":[210],"solution":[211],"In":[227],"study,":[229],"benchmark":[231],"our":[232],"method":[233],"using":[234],"6":[238],"genomes,":[239],"summing":[240],"approximate":[243],"total":[244],"14.2":[246],"pairs.":[249],"Three":[250],"case":[251],"studies":[252],"presented,":[254],"demonstrating":[255],"10.4":[257],"times":[258],"speedup":[259],"over":[260],"previous":[261],"state-of-the-art":[262],"linear":[265],"scalability.":[266]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
