{"id":"https://openalex.org/W4243336146","doi":"https://doi.org/10.1109/sc.2008.5214891","title":"An efficient parallel approach for identifying protein families in large-scale metagenomic data sets","display_name":"An efficient parallel approach for identifying protein families in large-scale metagenomic data sets","publication_year":2008,"publication_date":"2008-11-01","ids":{"openalex":"https://openalex.org/W4243336146","doi":"https://doi.org/10.1109/sc.2008.5214891"},"language":"en","primary_location":{"id":"doi:10.1109/sc.2008.5214891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sc.2008.5214891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 SC - International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110069965","display_name":"Changjun Wu","orcid":"https://orcid.org/0009-0007-3557-1240"},"institutions":[{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Changjun Wu","raw_affiliation_strings":["School of Electrical Engineering and Computer Science, Washington State University, Pullman, WA, USA"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Computer Science, Washington State University, Pullman, WA, USA","institution_ids":["https://openalex.org/I72951846"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048410309","display_name":"Ananth Kalyanaraman","orcid":"https://orcid.org/0000-0001-6721-233X"},"institutions":[{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ananth Kalyanaraman","raw_affiliation_strings":["School of Electrical Engineering and Computer Science, Washington State University, Pullman, WA, USA"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Computer Science, Washington State University, Pullman, WA, USA","institution_ids":["https://openalex.org/I72951846"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5110069965"],"corresponding_institution_ids":["https://openalex.org/I72951846"],"apc_list":null,"apc_paid":null,"fwci":0.2693,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.61152209,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7757053375244141},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7748972773551941},{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.7694891691207886},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7307381629943848},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.560093104839325},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5157108902931213},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.48553797602653503},{"id":"https://openalex.org/keywords/bipartite-graph","display_name":"Bipartite graph","score":0.4667067527770996},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4332338571548462},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4199538826942444},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4167459011077881},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3951868414878845},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.36805495619773865},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3185325860977173},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2190420925617218},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.143498957157135},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.12122282385826111},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.10974720120429993},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.0952301025390625}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7757053375244141},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7748972773551941},{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.7694891691207886},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7307381629943848},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.560093104839325},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5157108902931213},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.48553797602653503},{"id":"https://openalex.org/C197657726","wikidata":"https://www.wikidata.org/wiki/Q174733","display_name":"Bipartite graph","level":3,"score":0.4667067527770996},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4332338571548462},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4199538826942444},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4167459011077881},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3951868414878845},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36805495619773865},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3185325860977173},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2190420925617218},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.143498957157135},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.12122282385826111},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.10974720120429993},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.0952301025390625},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sc.2008.5214891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sc.2008.5214891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 SC - International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Life in Land","score":0.4699999988079071,"id":"https://metadata.un.org/sdg/15"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W159602724","https://openalex.org/W1604983895","https://openalex.org/W1635391495","https://openalex.org/W1791999417","https://openalex.org/W1984374364","https://openalex.org/W1986022261","https://openalex.org/W2036662864","https://openalex.org/W2036836182","https://openalex.org/W2074231493","https://openalex.org/W2075716829","https://openalex.org/W2081193615","https://openalex.org/W2087064593","https://openalex.org/W2093830129","https://openalex.org/W2099946731","https://openalex.org/W2108758729","https://openalex.org/W2109867978","https://openalex.org/W2111373249","https://openalex.org/W2113601822","https://openalex.org/W2121252285","https://openalex.org/W2122232025","https://openalex.org/W2124871329","https://openalex.org/W2126809954","https://openalex.org/W2134122907","https://openalex.org/W2145336165","https://openalex.org/W2147783737","https://openalex.org/W2161189461","https://openalex.org/W2164429509","https://openalex.org/W2584945553","https://openalex.org/W4210400672","https://openalex.org/W4230903726","https://openalex.org/W4236236547","https://openalex.org/W4255852734","https://openalex.org/W6606539241","https://openalex.org/W6636143309","https://openalex.org/W6680132782"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W4382618745","https://openalex.org/W1973775000","https://openalex.org/W2748922771","https://openalex.org/W1987128138","https://openalex.org/W2743976221"],"abstract_inverted_index":{"Metagenomics":[0],"is":[1,44],"the":[2,19,46,64,89,141],"study":[3],"of":[4,21,24,32,48,68,84,93,115,129,149],"environmental":[5,143],"microbial":[6],"communities":[7],"using":[8,146],"state-of-the-art":[9],"genomic":[10],"tools.":[11],"Recent":[12],"advancements":[13],"in":[14,45],"high-throughput":[15],"technologies":[16],"have":[17],"enabled":[18],"accumulation":[20],"large":[22],"volumes":[23],"metagenomic":[25,79],"data":[26,57],"that":[27],"was":[28,35],"until":[29],"a":[30,69,82,108,113,150],"couple":[31],"years":[33],"back":[34],"deemed":[36],"impractical":[37],"for":[38,55],"generation.":[39],"A":[40],"primary":[41],"bottleneck,":[42],"however,":[43],"lack":[47],"scalable":[49],"algorithms":[50],"and":[51,66,117,126],"open":[52],"source":[53],"software":[54],"large-scale":[56,78],"processing.":[58],"In":[59],"this":[60,105],"paper,":[61],"we":[62,87],"present":[63,124],"design":[65],"implementation":[67,133],"novel":[70],"parallel":[71],"approach":[72,102],"to":[73,91],"identify":[74],"protein":[75],"families":[76],"from":[77,98,140],"data.":[80],"Given":[81],"set":[83],"peptide":[85],"sequences":[86,139],"reduce":[88],"problem":[90],"one":[92],"detecting":[94],"arbitrarily-sized":[95],"dense":[96],"subgraphs":[97],"bipartite":[99],"graphs.":[100],"Our":[101],"efficiently":[103],"parallelizes":[104],"task":[106],"on":[107,134],"distributed":[109],"memory":[110],"machine":[111],"through":[112],"combination":[114],"divide-and-conquer":[116],"combinatorial":[118],"pattern":[119],"matching":[120],"heuristic":[121],"techniques.":[122],"We":[123],"performance":[125],"quality":[127],"results":[128],"extensively":[130],"testing":[131],"our":[132],"160":[135],"K":[136],"randomly":[137],"sampled":[138],"CAMERA":[142],"sequence":[144],"database":[145],"512":[147],"nodes":[148],"BlueGene/L":[151],"supercomputer.":[152]},"counts_by_year":[{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
