{"id":"https://openalex.org/W2149059931","doi":"https://doi.org/10.1093/bioinformatics/btt528","title":"BioPig: a Hadoop-based analytic toolkit for large-scale sequence data","display_name":"BioPig: a Hadoop-based analytic toolkit for large-scale sequence data","publication_year":2013,"publication_date":"2013-09-10","ids":{"openalex":"https://openalex.org/W2149059931","doi":"https://doi.org/10.1093/bioinformatics/btt528","mag":"2149059931","pmid":"https://pubmed.ncbi.nlm.nih.gov/24021384"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/btt528","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btt528","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021637804","display_name":"Henrik Nordberg","orcid":null},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I196679689","display_name":"Joint Genome Institute","ror":"https://ror.org/04xm1d337","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I196679689","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Henrik Nordberg","raw_affiliation_strings":["1 Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA and 2Genomics Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA"],"affiliations":[{"raw_affiliation_string":"1 Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA and 2Genomics Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA","institution_ids":["https://openalex.org/I196679689","https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033573275","display_name":"Karan Bhatia","orcid":"https://orcid.org/0000-0002-1978-228X"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I196679689","display_name":"Joint Genome Institute","ror":"https://ror.org/04xm1d337","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I196679689","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Karan Bhatia","raw_affiliation_strings":["1 Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA and 2Genomics Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA"],"affiliations":[{"raw_affiliation_string":"1 Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA and 2Genomics Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA","institution_ids":["https://openalex.org/I196679689","https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101518846","display_name":"Kai\u2010Xue Wang","orcid":"https://orcid.org/0000-0002-2076-5487"},"institutions":[{"id":"https://openalex.org/I196679689","display_name":"Joint Genome Institute","ror":"https://ror.org/04xm1d337","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I196679689","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kai Wang","raw_affiliation_strings":["1 Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA and 2Genomics Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA"],"affiliations":[{"raw_affiliation_string":"1 Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA and 2Genomics Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA","institution_ids":["https://openalex.org/I196679689","https://openalex.org/I148283060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100781872","display_name":"Zhong Wang","orcid":"https://orcid.org/0000-0002-6307-0458"},"institutions":[{"id":"https://openalex.org/I196679689","display_name":"Joint Genome Institute","ror":"https://ror.org/04xm1d337","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I196679689","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhong Wang","raw_affiliation_strings":["1 Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA and 2Genomics Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA"],"affiliations":[{"raw_affiliation_string":"1 Department of Energy, Joint Genome Institute, Walnut Creek, CA 94598, USA and 2Genomics Division, Lawrence Berkeley National Laboratory, Berkeley, CA 94720, USA","institution_ids":["https://openalex.org/I196679689","https://openalex.org/I148283060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100781872"],"corresponding_institution_ids":["https://openalex.org/I148283060","https://openalex.org/I196679689"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":null,"fwci":5.8027,"has_fulltext":false,"cited_by_count":114,"citation_normalized_percentile":{"value":0.97010538,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"29","issue":"23","first_page":"3014","last_page":"3019"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.8675049543380737},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.808646559715271},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6929999589920044},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.6089429259300232},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.544899046421051},{"id":"https://openalex.org/keywords/license","display_name":"License","score":0.5379655361175537},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.49996376037597656},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4627769887447357},{"id":"https://openalex.org/keywords/mit-license","display_name":"MIT License","score":0.448760062456131},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.428725928068161},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38887274265289307},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.30826669931411743},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.22426146268844604}],"concepts":[{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.8675049543380737},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.808646559715271},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6929999589920044},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.6089429259300232},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.544899046421051},{"id":"https://openalex.org/C2780560020","wikidata":"https://www.wikidata.org/wiki/Q79719","display_name":"License","level":2,"score":0.5379655361175537},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.49996376037597656},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4627769887447357},{"id":"https://openalex.org/C174183944","wikidata":"https://www.wikidata.org/wiki/Q334661","display_name":"MIT License","level":3,"score":0.448760062456131},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.428725928068161},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38887274265289307},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.30826669931411743},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.22426146268844604},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1093/bioinformatics/btt528","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btt528","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:24021384","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/24021384","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320316170","display_name":"Joint Genome Institute","ror":"https://ror.org/04xm1d337"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1517480469","https://openalex.org/W1964106002","https://openalex.org/W1969346416","https://openalex.org/W1990073900","https://openalex.org/W2011411346","https://openalex.org/W2017928178","https://openalex.org/W2037444377","https://openalex.org/W2081294880","https://openalex.org/W2108234281","https://openalex.org/W2109701112","https://openalex.org/W2117582473","https://openalex.org/W2118526609","https://openalex.org/W2119180969","https://openalex.org/W2121762798","https://openalex.org/W2131975293","https://openalex.org/W2150772374","https://openalex.org/W2154462802","https://openalex.org/W2160363604","https://openalex.org/W2160969485","https://openalex.org/W2171777347","https://openalex.org/W2173213060","https://openalex.org/W2342204193","https://openalex.org/W2885727518","https://openalex.org/W2912516925","https://openalex.org/W3129170862","https://openalex.org/W6645048466","https://openalex.org/W6679815717"],"related_works":["https://openalex.org/W2036021480","https://openalex.org/W2546377002","https://openalex.org/W4285209474","https://openalex.org/W2046541848","https://openalex.org/W22571951","https://openalex.org/W4384009753","https://openalex.org/W2791776396","https://openalex.org/W4286531993","https://openalex.org/W2964820744","https://openalex.org/W2249393407"],"abstract_inverted_index":{"We":[0],"built":[1],"BioPig":[2,22,40,58,88],"on":[3,64],"the":[4,10,81,95],"Apache's":[5],"Hadoop":[6,66],"MapReduce":[7],"system":[8,72],"and":[9,19,56,80],"Pig":[11],"data":[12],"flow":[13],"language.":[14],"Compared":[15],"with":[16,41,52,70,94],"traditional":[17],"serial":[18],"MPI-based":[20],"algorithms,":[21],"has":[23],"three":[24],"major":[25],"advantages:":[26],"first,":[27],"BioPig's":[28],"programmability":[29],"greatly":[30,98],"reduces":[31],"development":[32],"time":[33],"for":[34],"parallel":[35],"bioinformatics":[36,101],"applications;":[37],"second,":[38],"testing":[39],"up":[42],"to":[43,97],"500":[44],"Gb":[45],"sequences":[46],"demonstrates":[47],"that":[48],"it":[49],"scales":[50],"automatically":[51],"size":[53],"of":[54],"data;":[55],"finally,":[57],"can":[59],"be":[60],"ported":[61],"without":[62],"modification":[63],"many":[65],"infrastructures,":[67],"as":[68],"tested":[69],"Magellan":[71],"at":[73],"National":[74],"Energy":[75],"Research":[76],"Scientific":[77],"Computing":[78],"Center":[79],"Amazon":[82],"Elastic":[83],"Compute":[84],"Cloud.":[85],"In":[86],"summary,":[87],"represents":[89],"a":[90],"novel":[91],"program":[92],"framework":[93],"potential":[96],"accelerate":[99],"data-intensive":[100],"analysis.":[102]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":12},{"year":2017,"cited_by_count":24},{"year":2016,"cited_by_count":13},{"year":2015,"cited_by_count":11},{"year":2014,"cited_by_count":14},{"year":2013,"cited_by_count":1}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
