{"id":"https://openalex.org/W2602978558","doi":"https://doi.org/10.1186/s12859-017-1466-6","title":"A framework for space-efficient read clustering in metagenomic samples","display_name":"A framework for space-efficient read clustering in metagenomic samples","publication_year":2017,"publication_date":"2017-03-01","ids":{"openalex":"https://openalex.org/W2602978558","doi":"https://doi.org/10.1186/s12859-017-1466-6","mag":"2602978558"},"language":"en","primary_location":{"id":"doi:10.1186/s12859-017-1466-6","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-017-1466-6","pdf_url":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-017-1466-6","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-017-1466-6","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067903348","display_name":"Jarno Alanko","orcid":"https://orcid.org/0000-0002-8003-9225"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jarno Alanko","raw_affiliation_strings":["Department of Computer Science, University of Helsinki, Gustaf H\u00e4llstr\u00f6min katu 2b, Helsinki, 00560, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Helsinki, Gustaf H\u00e4llstr\u00f6min katu 2b, Helsinki, 00560, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007639641","display_name":"Fabio Cunial","orcid":"https://orcid.org/0000-0003-0282-5738"},"institutions":[{"id":"https://openalex.org/I4210159854","display_name":"Max Planck Institute of Molecular Cell Biology and Genetics","ror":"https://ror.org/05b8d3w18","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210159854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Fabio Cunial","raw_affiliation_strings":["Max Planck Institute for Molecular Cell Biology and Genetics, Pfotenhauerstr. 108, Dresden, 01307, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Molecular Cell Biology and Genetics, Pfotenhauerstr. 108, Dresden, 01307, Germany","institution_ids":["https://openalex.org/I4210159854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059502958","display_name":"Djamal Belazzougui","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095777","display_name":"Centre de Recherche sur l'Information Scientifique et Technique","ror":"https://ror.org/01k1bte55","country_code":"DZ","type":"government","lineage":["https://openalex.org/I4210095777","https://openalex.org/I4210114810"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Djamal Belazzougui","raw_affiliation_strings":["DTISI, CERIST (Research Centre for Scientific and Technical Information), Rue des 3 Fr\u00e9res Aissou, Algiers, 16306, Algeria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DTISI, CERIST (Research Centre for Scientific and Technical Information), Rue des 3 Fr\u00e9res Aissou, Algiers, 16306, Algeria","institution_ids":["https://openalex.org/I4210095777"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043558409","display_name":"Veli M\u00e4kinen","orcid":"https://orcid.org/0000-0003-4454-1493"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Veli M\u00e4kinen","raw_affiliation_strings":["Department of Computer Science, University of Helsinki, Gustaf H\u00e4llstr\u00f6min katu 2b, Helsinki, 00560, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Helsinki, Gustaf H\u00e4llstr\u00f6min katu 2b, Helsinki, 00560, Finland","institution_ids":["https://openalex.org/I133731052"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":1.0327,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.82563806,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"18","issue":"S3","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9628000259399414,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.8627132177352905},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6339854001998901},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.5763095617294312},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5503338575363159},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4872225821018219},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.4561029076576233},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4064887464046478},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.3935452401638031},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35248318314552307},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2069770097732544},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.1875424087047577},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.1210569441318512}],"concepts":[{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.8627132177352905},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6339854001998901},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.5763095617294312},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5503338575363159},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4872225821018219},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.4561029076576233},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4064887464046478},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.3935452401638031},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35248318314552307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2069770097732544},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.1875424087047577},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.1210569441318512},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1186/s12859-017-1466-6","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-017-1466-6","pdf_url":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-017-1466-6","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmh:oai:helda.helsinki.fi:10138/177550","is_oa":true,"landing_page_url":"http://hdl.handle.net/10138/177550","pdf_url":null,"source":{"id":"https://openalex.org/S4210213322","display_name":"Ty\u00f6v\u00e4entutkimus Vuosikirja","issn_l":"0784-1272","issn":["0784-1272","1459-7780"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"http://purl.org/eprint/entityType/ScholarlyWork"},{"id":"pmh:oai:helda.helsinki.fi:10138/182782","is_oa":true,"landing_page_url":"http://hdl.handle.net/10138/182782","pdf_url":null,"source":{"id":"https://openalex.org/S4210213322","display_name":"Ty\u00f6v\u00e4entutkimus Vuosikirja","issn_l":"0784-1272","issn":["0784-1272","1459-7780"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:pubmedcentral.nih.gov:5374685","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/5374685","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"},{"id":"pmh:oai:pure.mpg.de:item_2642423","is_oa":false,"landing_page_url":"http://hdl.handle.net/21.11116/0000-0002-8B5A-D","pdf_url":null,"source":{"id":"https://openalex.org/S4306400654","display_name":"MPG.PuRe (Max Planck Society)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149899117","host_organization_name":"Max Planck Society","host_organization_lineage":["https://openalex.org/I149899117"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC bioinformatics","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1186/s12859-017-1466-6","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-017-1466-6","pdf_url":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-017-1466-6","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15","score":0.5699999928474426}],"awards":[{"id":"https://openalex.org/G2986225747","display_name":null,"funder_award_id":"284598","funder_id":"https://openalex.org/F4320321108","funder_display_name":"Academy of Finland"}],"funders":[{"id":"https://openalex.org/F4320321108","display_name":"Academy of Finland","ror":"https://ror.org/05k73zm37"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2602978558.pdf","grobid_xml":"https://content.openalex.org/works/W2602978558.grobid-xml"},"referenced_works_count":58,"referenced_works":["https://openalex.org/W820014006","https://openalex.org/W1509727978","https://openalex.org/W1531368347","https://openalex.org/W1549037892","https://openalex.org/W1962019683","https://openalex.org/W1963979462","https://openalex.org/W1983048808","https://openalex.org/W1989348205","https://openalex.org/W1992715872","https://openalex.org/W1997673861","https://openalex.org/W2048818637","https://openalex.org/W2057197272","https://openalex.org/W2057215967","https://openalex.org/W2058126760","https://openalex.org/W2061474902","https://openalex.org/W2082203850","https://openalex.org/W2082650848","https://openalex.org/W2086291326","https://openalex.org/W2086536051","https://openalex.org/W2090079179","https://openalex.org/W2097341408","https://openalex.org/W2100673218","https://openalex.org/W2103215646","https://openalex.org/W2103901746","https://openalex.org/W2106985417","https://openalex.org/W2113915152","https://openalex.org/W2116041602","https://openalex.org/W2119279650","https://openalex.org/W2120680184","https://openalex.org/W2125826054","https://openalex.org/W2129463378","https://openalex.org/W2134696992","https://openalex.org/W2137604316","https://openalex.org/W2142942734","https://openalex.org/W2150208009","https://openalex.org/W2152766058","https://openalex.org/W2155563453","https://openalex.org/W2156995746","https://openalex.org/W2159591897","https://openalex.org/W2162039942","https://openalex.org/W2162210683","https://openalex.org/W2163830511","https://openalex.org/W2164327068","https://openalex.org/W2165520746","https://openalex.org/W2183776810","https://openalex.org/W2406547600","https://openalex.org/W2533248932","https://openalex.org/W2589084350","https://openalex.org/W2616224760","https://openalex.org/W2752885492","https://openalex.org/W2949430765","https://openalex.org/W2952928811","https://openalex.org/W3145128584","https://openalex.org/W4231553787","https://openalex.org/W4231842606","https://openalex.org/W4233182706","https://openalex.org/W4246219036","https://openalex.org/W6679642144"],"related_works":["https://openalex.org/W638577851","https://openalex.org/W2968354375","https://openalex.org/W2359440920","https://openalex.org/W2889550857","https://openalex.org/W3013372948","https://openalex.org/W3135997498","https://openalex.org/W4242417330","https://openalex.org/W3198631238","https://openalex.org/W4388282505","https://openalex.org/W3044162010"],"abstract_inverted_index":{"A":[0],"metagenomic":[1,24,30,72],"sample":[2,31,94,148],"is":[3,140,151],"a":[4,29,59,65,81,93,141,175],"set":[5,87],"of":[6,67,88,95,102,110,125,143,146,155,178,195],"DNA":[7],"fragments,":[8],"randomly":[9],"extracted":[10],"from":[11],"multiple":[12,172],"cells":[13],"in":[14,70,127,187,190],"an":[15,108],"environment,":[16],"belonging":[17],"to":[18,129,133],"distinct,":[19],"often":[20],"unknown":[21],"species.":[22],"Unsupervised":[23],"clustering":[25,50,73],"aims":[26],"at":[27],"partitioning":[28],"into":[32],"sets":[33],"that":[34,63,164],"approximate":[35],"taxonomic":[36],"units,":[37],"without":[38],"using":[39,74],"reference":[40],"genomes.":[41],"Since":[42],"samples":[43],"are":[44,52,167,184],"large":[45],"and":[46,57,80,119,132,149,182,189],"steadily":[47],"growing,":[48],"space-efficient":[49,60],"algorithms":[51,115,166],"strongly":[53],"needed.":[54],"We":[55],"design":[56],"implement":[58],"algorithmic":[61],"framework":[62],"solves":[64],"number":[66],"core":[68],"primitives":[69],"unsupervised":[71],"just":[75,120],"the":[76,86,130,134,144,147,152,156,179,193,196],"bidirectional":[77],"Burrows-Wheeler":[78],"index":[79,131],"union-find":[82,135,157],"data":[83,136,158],"structure":[84],"on":[85,92,107],"reads.":[89],"When":[90],"run":[91],"total":[96,111],"length":[97,104],"n,":[98],"with":[99,192],"m":[100],"reads":[101],"maximum":[103],"\u2113":[105],"each,":[106],"alphabet":[109],"size":[112],"\u03c3,":[113],"our":[114,165],"take":[116],"O(n(t+log\u03c3))":[117],"time":[118,154,191],"2n+o(n)+O(max{\u2113":[121],"\u03c3logn,K":[122],"logm})":[123],"bits":[124],"space":[126,188],"addition":[128],"structure,":[137],"where":[138],"K":[139],"measure":[142],"redundancy":[145],"t":[150],"query":[153],"structure.":[159],"Our":[160],"experimental":[161],"results":[162],"show":[163],"practical,":[168],"they":[169,183],"can":[170],"exploit":[171],"cores":[173],"by":[174],"parallel":[176],"traversal":[177],"suffix-link":[180],"tree,":[181],"competitive":[185],"both":[186],"state":[194],"art.":[197]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
