{"id":"https://openalex.org/W2111071896","doi":"https://doi.org/10.14778/2535569.2448951","title":"Memory efficient minimum substring partitioning","display_name":"Memory efficient minimum substring partitioning","publication_year":2013,"publication_date":"2013-01-01","ids":{"openalex":"https://openalex.org/W2111071896","doi":"https://doi.org/10.14778/2535569.2448951","mag":"2111071896"},"language":"en","primary_location":{"id":"doi:10.14778/2535569.2448951","is_oa":false,"landing_page_url":"https://doi.org/10.14778/2535569.2448951","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100421418","display_name":"Yang Li","orcid":"https://orcid.org/0000-0002-0736-251X"},"institutions":[{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yang Li","raw_affiliation_strings":["University of California, Santa Barbara","University of California,Santa Barbara,"],"affiliations":[{"raw_affiliation_string":"University of California, Santa Barbara","institution_ids":["https://openalex.org/I154570441"]},{"raw_affiliation_string":"University of California,Santa Barbara,","institution_ids":["https://openalex.org/I154570441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046945057","display_name":"Pegah Kamousi","orcid":null},"institutions":[{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pegah Kamousi","raw_affiliation_strings":["University of California, Santa Barbara","University of California,Santa Barbara,"],"affiliations":[{"raw_affiliation_string":"University of California, Santa Barbara","institution_ids":["https://openalex.org/I154570441"]},{"raw_affiliation_string":"University of California,Santa Barbara,","institution_ids":["https://openalex.org/I154570441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013081925","display_name":"Fangqiu Han","orcid":"https://orcid.org/0009-0006-0309-7284"},"institutions":[{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fangqiu Han","raw_affiliation_strings":["University of California, Santa Barbara","University of California,Santa Barbara,"],"affiliations":[{"raw_affiliation_string":"University of California, Santa Barbara","institution_ids":["https://openalex.org/I154570441"]},{"raw_affiliation_string":"University of California,Santa Barbara,","institution_ids":["https://openalex.org/I154570441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075362627","display_name":"Shengqi Yang","orcid":"https://orcid.org/0000-0002-0782-9116"},"institutions":[{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shengqi Yang","raw_affiliation_strings":["University of California, Santa Barbara","University of California,Santa Barbara,"],"affiliations":[{"raw_affiliation_string":"University of California, Santa Barbara","institution_ids":["https://openalex.org/I154570441"]},{"raw_affiliation_string":"University of California,Santa Barbara,","institution_ids":["https://openalex.org/I154570441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047709762","display_name":"Xifeng Yan","orcid":"https://orcid.org/0009-0000-6508-4792"},"institutions":[{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xifeng Yan","raw_affiliation_strings":["University of California, Santa Barbara","University of California,Santa Barbara,"],"affiliations":[{"raw_affiliation_string":"University of California, Santa Barbara","institution_ids":["https://openalex.org/I154570441"]},{"raw_affiliation_string":"University of California,Santa Barbara,","institution_ids":["https://openalex.org/I154570441"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072397842","display_name":"Subhash Suri","orcid":"https://orcid.org/0000-0002-5668-7521"},"institutions":[{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Subhash Suri","raw_affiliation_strings":["University of California, Santa Barbara","University of California,Santa Barbara,"],"affiliations":[{"raw_affiliation_string":"University of California, Santa Barbara","institution_ids":["https://openalex.org/I154570441"]},{"raw_affiliation_string":"University of California,Santa Barbara,","institution_ids":["https://openalex.org/I154570441"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100421418"],"corresponding_institution_ids":["https://openalex.org/I154570441"],"apc_list":null,"apc_paid":null,"fwci":1.7546,"has_fulltext":false,"cited_by_count":49,"citation_normalized_percentile":{"value":0.84659696,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"6","issue":"3","first_page":"169","last_page":"180"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.9224129915237427},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7520842552185059},{"id":"https://openalex.org/keywords/de-bruijn-sequence","display_name":"De Bruijn sequence","score":0.7462632656097412},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6904311180114746},{"id":"https://openalex.org/keywords/de-bruijn-graph","display_name":"De Bruijn graph","score":0.6639915704727173},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.6169362664222717},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.550557553768158},{"id":"https://openalex.org/keywords/disjoint-sets","display_name":"Disjoint sets","score":0.48705652356147766},{"id":"https://openalex.org/keywords/sequence-assembly","display_name":"Sequence assembly","score":0.47884583473205566},{"id":"https://openalex.org/keywords/out-of-core-algorithm","display_name":"Out-of-core algorithm","score":0.464020311832428},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.44289132952690125},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.37172067165374756},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3640978932380676},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.2938477396965027},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.17964014410972595},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16490286588668823},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.14507713913917542},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.0959736704826355}],"concepts":[{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.9224129915237427},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7520842552185059},{"id":"https://openalex.org/C170320093","wikidata":"https://www.wikidata.org/wiki/Q1953457","display_name":"De Bruijn sequence","level":2,"score":0.7462632656097412},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6904311180114746},{"id":"https://openalex.org/C20218877","wikidata":"https://www.wikidata.org/wiki/Q3066095","display_name":"De Bruijn graph","level":3,"score":0.6639915704727173},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.6169362664222717},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.550557553768158},{"id":"https://openalex.org/C45340560","wikidata":"https://www.wikidata.org/wiki/Q215382","display_name":"Disjoint sets","level":2,"score":0.48705652356147766},{"id":"https://openalex.org/C18949551","wikidata":"https://www.wikidata.org/wiki/Q740578","display_name":"Sequence assembly","level":5,"score":0.47884583473205566},{"id":"https://openalex.org/C79470037","wikidata":"https://www.wikidata.org/wiki/Q279748","display_name":"Out-of-core algorithm","level":2,"score":0.464020311832428},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.44289132952690125},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.37172067165374756},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3640978932380676},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2938477396965027},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.17964014410972595},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16490286588668823},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.14507713913917542},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0959736704826355},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C162317418","wikidata":"https://www.wikidata.org/wiki/Q252857","display_name":"Transcriptome","level":4,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/2535569.2448951","is_oa":false,"landing_page_url":"https://doi.org/10.14778/2535569.2448951","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.41999998688697815}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320333422","display_name":"Materials Research Science and Engineering Center, Harvard University","ror":null},{"id":"https://openalex.org/F4320337965","display_name":"California NanoSystems Institute","ror":"https://ror.org/00q7fqf35"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W73629738","https://openalex.org/W1635391495","https://openalex.org/W1966822396","https://openalex.org/W1972418517","https://openalex.org/W1972924519","https://openalex.org/W2020191321","https://openalex.org/W2053127227","https://openalex.org/W2080330461","https://openalex.org/W2103903744","https://openalex.org/W2108991785","https://openalex.org/W2133531097","https://openalex.org/W2136651963","https://openalex.org/W2141370774","https://openalex.org/W2142749416","https://openalex.org/W2144560237","https://openalex.org/W2156104322","https://openalex.org/W2159647614","https://openalex.org/W2160969485","https://openalex.org/W2161048357","https://openalex.org/W2161546116","https://openalex.org/W2164456230","https://openalex.org/W2752885492","https://openalex.org/W3111526966","https://openalex.org/W4251082071","https://openalex.org/W6603010030","https://openalex.org/W6670718873","https://openalex.org/W6680106728"],"related_works":["https://openalex.org/W2562683361","https://openalex.org/W3087469195","https://openalex.org/W2354744388","https://openalex.org/W4386075131","https://openalex.org/W2141370774","https://openalex.org/W2950451697","https://openalex.org/W4302330868","https://openalex.org/W4280511762","https://openalex.org/W2111071896","https://openalex.org/W1496716358"],"abstract_inverted_index":{"Massively":[0],"parallel":[1],"DNA":[2],"sequencing":[3],"technologies":[4],"are":[5],"revolutionizing":[6],"genomics":[7],"research.":[8],"Billions":[9],"of":[10,30,58,75,145,156,173,183],"short":[11,108,175],"reads":[12,109],"generated":[13],"at":[14],"low":[15],"costs":[16],"can":[17,119,193],"be":[18,120],"assembled":[19],"for":[20,45,78,202],"reconstructing":[21],"the":[22,26,31,42,55,94,107,139,142,171,174,181],"whole":[23],"genomes.":[24,80],"Unfortunately,":[25],"large":[27,79],"memory":[28,56,77],"footprint":[29],"existing":[32],"de":[33,60,134,195],"novo":[34],"assembly":[35,43,68],"algorithms":[36],"makes":[37],"it":[38],"challenging":[39],"to":[40,92,131,164],"get":[41],"done":[44],"higher":[46],"eukaryotes":[47],"like":[48],"mammals.":[49],"In":[50],"this":[51],"work,":[52],"we":[53],"investigate":[54],"issue":[57],"constructing":[59],"Bruijn":[61,135,196],"graph,":[62],"a":[63,83,133,184,199],"core":[64],"task":[65,95],"in":[66],"leading":[67],"algorithms,":[69],"which":[70],"often":[71],"consumes":[72],"several":[73],"hundreds":[74],"gigabytes":[76,100],"We":[81],"propose":[82],"disk-based":[84],"partition":[85,118],"method,":[86],"called":[87],"Minimum":[88],"Substring":[89],"Partitioning":[90],"(MSP),":[91],"complete":[93],"using":[96,198],"less":[97],"than":[98],"10":[99],"memory,":[101,123],"without":[102],"runtime":[103],"slowdown.":[104],"MSP":[105,148],"breaks":[106],"into":[110,122],"multiple":[111],"small":[112],"disjoint":[113],"partitions":[114,157],"so":[115],"that":[116,190],"each":[117],"loaded":[121],"processed":[124],"individually":[125],"and":[126,178],"later":[127],"merged":[128],"with":[129],"others":[130],"form":[132],"graph.":[136],"By":[137],"leveraging":[138],"overlaps":[140],"among":[141],"k-mers":[143],"(substring":[144],"length":[146,182],"k),":[147],"achieves":[149],"astonishing":[150],"compression":[151],"ratio:":[152],"The":[153],"total":[154],"size":[155,172],"is":[158,170,180],"reduced":[159],"from":[160],"\u0398(":[161,165],"kn":[162],")":[163],"n":[166,169],"),":[167],"where":[168],"read":[176],"database,":[177],"k":[179,185],"-mer.":[186],"Experimental":[187],"results":[188],"show":[189],"our":[191],"method":[192],"build":[194],"graphs":[197],"commodity":[200],"computer":[201],"any":[203],"large-volume":[204],"sequence":[205],"dataset.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
