{"id":"https://openalex.org/W1989435384","doi":"https://doi.org/10.1016/j.procs.2012.04.219","title":"Improved Motif Detection in Large Sequence Sets with Random Sampling in a Kepler workflow","display_name":"Improved Motif Detection in Large Sequence Sets with Random Sampling in a Kepler workflow","publication_year":2012,"publication_date":"2012-01-01","ids":{"openalex":"https://openalex.org/W1989435384","doi":"https://doi.org/10.1016/j.procs.2012.04.219","mag":"1989435384"},"language":"en","primary_location":{"id":"doi:10.1016/j.procs.2012.04.219","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2012.04.219","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1016/j.procs.2012.04.219","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101634110","display_name":"Sven K\u00f6hler","orcid":"https://orcid.org/0000-0002-6068-135X"},"institutions":[{"id":"https://openalex.org/I84218800","display_name":"University of California, Davis","ror":"https://ror.org/05rrcem69","country_code":"US","type":"education","lineage":["https://openalex.org/I84218800"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sven K\u00f6hler","raw_affiliation_strings":["University of California, Davis","university of california, davis"],"affiliations":[{"raw_affiliation_string":"University of California, Davis","institution_ids":["https://openalex.org/I84218800"]},{"raw_affiliation_string":"university of california, davis","institution_ids":["https://openalex.org/I84218800"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000201082","display_name":"Phillip Seitzer","orcid":"https://orcid.org/0000-0002-7379-8960"},"institutions":[{"id":"https://openalex.org/I84218800","display_name":"University of California, Davis","ror":"https://ror.org/05rrcem69","country_code":"US","type":"education","lineage":["https://openalex.org/I84218800"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Phillip Seitzer","raw_affiliation_strings":["University of California, Davis","university of california, davis"],"affiliations":[{"raw_affiliation_string":"University of California, Davis","institution_ids":["https://openalex.org/I84218800"]},{"raw_affiliation_string":"university of california, davis","institution_ids":["https://openalex.org/I84218800"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028322147","display_name":"Marc T. Facciotti","orcid":"https://orcid.org/0000-0003-4453-3274"},"institutions":[{"id":"https://openalex.org/I84218800","display_name":"University of California, Davis","ror":"https://ror.org/05rrcem69","country_code":"US","type":"education","lineage":["https://openalex.org/I84218800"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marc T. Facciotti","raw_affiliation_strings":["University of California, Davis","university of california, davis"],"affiliations":[{"raw_affiliation_string":"University of California, Davis","institution_ids":["https://openalex.org/I84218800"]},{"raw_affiliation_string":"university of california, davis","institution_ids":["https://openalex.org/I84218800"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057600294","display_name":"Bertram Lud\u00e4scher","orcid":"https://orcid.org/0000-0001-9140-936X"},"institutions":[{"id":"https://openalex.org/I84218800","display_name":"University of California, Davis","ror":"https://ror.org/05rrcem69","country_code":"US","type":"education","lineage":["https://openalex.org/I84218800"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bertram Lud\u00e4scher","raw_affiliation_strings":["University of California, Davis","university of california, davis"],"affiliations":[{"raw_affiliation_string":"University of California, Davis","institution_ids":["https://openalex.org/I84218800"]},{"raw_affiliation_string":"university of california, davis","institution_ids":["https://openalex.org/I84218800"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101634110"],"corresponding_institution_ids":["https://openalex.org/I84218800"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.15633458,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"9","issue":null,"first_page":"1999","last_page":"1999"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7594289779663086},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.701891303062439},{"id":"https://openalex.org/keywords/motif","display_name":"Motif (music)","score":0.696938157081604},{"id":"https://openalex.org/keywords/sequence-motif","display_name":"Sequence motif","score":0.6795033812522888},{"id":"https://openalex.org/keywords/subsequence","display_name":"Subsequence","score":0.5693799257278442},{"id":"https://openalex.org/keywords/kepler","display_name":"Kepler","score":0.5360029339790344},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5152044892311096},{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.5148093104362488},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4233623445034027},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.3434862494468689},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3332493305206299},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16425928473472595}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7594289779663086},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.701891303062439},{"id":"https://openalex.org/C32276052","wikidata":"https://www.wikidata.org/wiki/Q908349","display_name":"Motif (music)","level":2,"score":0.696938157081604},{"id":"https://openalex.org/C117745874","wikidata":"https://www.wikidata.org/wiki/Q901612","display_name":"Sequence motif","level":3,"score":0.6795033812522888},{"id":"https://openalex.org/C137877099","wikidata":"https://www.wikidata.org/wiki/Q1332977","display_name":"Subsequence","level":3,"score":0.5693799257278442},{"id":"https://openalex.org/C207963374","wikidata":"https://www.wikidata.org/wiki/Q47592","display_name":"Kepler","level":3,"score":0.5360029339790344},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5152044892311096},{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.5148093104362488},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4233623445034027},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3434862494468689},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3332493305206299},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16425928473472595},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C150846664","wikidata":"https://www.wikidata.org/wiki/Q7602306","display_name":"Stars","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.procs.2012.04.219","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2012.04.219","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.procs.2012.04.219","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2012.04.219","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":2,"referenced_works":["https://openalex.org/W2037712729","https://openalex.org/W2130595749"],"related_works":["https://openalex.org/W1583922594","https://openalex.org/W2998448420","https://openalex.org/W4205474360","https://openalex.org/W2073661767","https://openalex.org/W2380185347","https://openalex.org/W2370034659","https://openalex.org/W2159636084","https://openalex.org/W2117168988","https://openalex.org/W2103777723","https://openalex.org/W2610941444"],"abstract_inverted_index":{"The":[0],"discovery":[1,43],"of":[2,14,44,61,75,81,88,131,139,218,237,255],"functionally":[3,114],"significant":[4,102,115],"short,":[5],"statistically":[6],"overrepresented":[7],"subsequence":[8],"patterns":[9],"(motifs)":[10],"in":[11,27,99,164,241,258],"a":[12,18,31,49,85,121,174,190,199],"set":[13,29,161],"biological":[15],"sequences":[16,26,39,76],"is":[17,48,142,151,162],"challenging":[19],"and":[20,101,117,157,247],"important":[21],"problem.":[22],"Oftentimes,":[23],"not":[24,134,152],"all":[25,97],"the":[28,41,59,128,145,148,158,178,196,216,219,234,238,253],"contain":[30],"motif.":[32],"When":[33],"using":[34,177],"traditional":[35],"methods":[36],"these":[37,82],"non-motif-containing":[38],"complicate":[40],"algorithmic":[42],"motifs.":[45],"MotifCatcher":[46,132],"[1]":[47],"framework":[50,111],"developed":[51],"by":[52,65],"Seitzer":[53],"et":[54],"al.":[55],"[2]":[56],"that":[57,147,183],"extends":[58],"sensitivity":[60],"existing":[62],"motiffinding":[63],"tools":[64,93],"applying":[66],"random":[67],"sampling":[68],"to":[69,96,136,144,168,194,204,232,251],"input":[70],"sequences.":[71,90,140],"First,":[72],"multiple":[73],"subsets":[74,83,98,256],"are":[77,94,104],"randomly":[78],"constructed.":[79],"Some":[80],"have":[84],"large":[86,137],"number":[87,254],"motif-containing":[89],"Traditional":[91],"motif-finding":[92],"applied":[95],"parallel,":[100],"motifs":[103,116,243],"recovered":[105],"from":[106],"appropriate":[107],"subsets.":[108],"Finally,":[109],"this":[110],"returns":[112],"candidate":[113],"organizes":[118],"them":[119],"into":[120,198],"tree.":[122],"which":[123,242],"allows":[124],"further":[125],"analysis.":[126],"However,":[127],"current":[129,149],"implementation":[130,150,176],"does":[133],"scale":[135],"sets":[138,240],"This":[141],"due":[143],"fact":[146],"suited":[153],"for":[154,208],"distributed":[155],"computing,":[156],"whole":[159],"sequence":[160,239],"kept":[163],"memory.":[165],"In":[166],"order":[167],"achieve":[169],"better":[170],"scalability":[171],"we":[172,229,248],"present":[173],"redesigned":[175],"Kepler":[179,187,213,228],"scientific":[180],"workflow":[181],"system":[182],"addresses":[184],"those":[185],"shortcomings.":[186],"functions":[188],"as":[189],"convenient":[191],"front":[192],"end":[193],"encode":[195],"computation":[197],"Map":[200],"Reduce":[201],"pattern":[202],"[3]":[203],"archive":[205],"high":[206],"parallelism":[207],"computationally":[209],"intensive":[210],"steps.":[211],"Furthermore,":[212],"greatly":[214],"simplifies":[215],"substitution":[217],"motif":[220],"finding":[221],"algorithm":[222],"used":[223],"on":[224],"each":[225],"subset.":[226],"Using":[227],"ware":[230],"able":[231,250],"increase":[233,252],"maximum":[235],"size":[236],"can":[244],"be":[245],"discovered":[246],"were":[249],"processed":[257],"parallel.":[259]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
