{"id":"https://openalex.org/W4206766548","doi":"https://doi.org/10.1109/bibm52615.2021.9669512","title":"scSpark<sup>XMBD</sup>: High-Performance scRNA-seq Data Processing with Spark","display_name":"scSpark<sup>XMBD</sup>: High-Performance scRNA-seq Data Processing with Spark","publication_year":2021,"publication_date":"2021-12-09","ids":{"openalex":"https://openalex.org/W4206766548","doi":"https://doi.org/10.1109/bibm52615.2021.9669512"},"language":"en","primary_location":{"id":"doi:10.1109/bibm52615.2021.9669512","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm52615.2021.9669512","pdf_url":null,"source":{"id":"https://openalex.org/S4363607735","display_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100345633","display_name":"Yu Liu","orcid":"https://orcid.org/0000-0001-7110-127X"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Liu","raw_affiliation_strings":["School of Informatics, Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103072772","display_name":"Mingxuan Gao","orcid":"https://orcid.org/0000-0002-6619-6507"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingxuan Gao","raw_affiliation_strings":["School of Informatics, Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005411927","display_name":"Lixuan Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lixuan Tan","raw_affiliation_strings":["School of Informatics, Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044426459","display_name":"Hongjin Liu","orcid":"https://orcid.org/0000-0001-5625-2395"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongjin Liu","raw_affiliation_strings":["School of Informatics, Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074971639","display_name":"Yating Lin","orcid":"https://orcid.org/0000-0001-5061-5132"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yating Lin","raw_affiliation_strings":["School of Informatics, Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062411566","display_name":"Wenxian Yang","orcid":"https://orcid.org/0000-0002-5349-9680"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenxian Yang","raw_affiliation_strings":["Aginome Scientific, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Aginome Scientific, Xiamen, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068270780","display_name":"Rongshan Yu","orcid":"https://orcid.org/0000-0003-2179-173X"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongshan Yu","raw_affiliation_strings":["School of Informatics, National Institute for Data Science in Health and Medicine Xiamen University, Xiamen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, National Institute for Data Science in Health and Medicine Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5203,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59217877,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"33","issue":null,"first_page":"1956","last_page":"1962"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11289","display_name":"Single-cell and spatial transcriptomics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11289","display_name":"Single-cell and spatial transcriptomics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11287","display_name":"Cancer Genomics and Diagnostics","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9765999913215637,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7347502708435059},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.7320106029510498},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6537061333656311},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6417832970619202},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.5659109950065613},{"id":"https://openalex.org/keywords/data-processing","display_name":"Data processing","score":0.5496011972427368},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5051653981208801},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.46617555618286133},{"id":"https://openalex.org/keywords/barcode","display_name":"Barcode","score":0.41889792680740356},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.31266486644744873},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.23037365078926086},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18398886919021606},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.16604265570640564},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14678961038589478},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09197694063186646}],"concepts":[{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7347502708435059},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.7320106029510498},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6537061333656311},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6417832970619202},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.5659109950065613},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.5496011972427368},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5051653981208801},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.46617555618286133},{"id":"https://openalex.org/C2776841711","wikidata":"https://www.wikidata.org/wiki/Q856","display_name":"Barcode","level":2,"score":0.41889792680740356},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31266486644744873},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.23037365078926086},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18398886919021606},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.16604265570640564},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14678961038589478},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09197694063186646},{"id":"https://openalex.org/C87717796","wikidata":"https://www.wikidata.org/wiki/Q146326","display_name":"Environmental engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bibm52615.2021.9669512","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm52615.2021.9669512","pdf_url":null,"source":{"id":"https://openalex.org/S4363607735","display_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4300000071525574}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2102278945","https://openalex.org/W2104247393","https://openalex.org/W2108234281","https://openalex.org/W2117608012","https://openalex.org/W2121180999","https://openalex.org/W2131975293","https://openalex.org/W2133233871","https://openalex.org/W2160363604","https://openalex.org/W2169456326","https://openalex.org/W2189465200","https://openalex.org/W2398924165","https://openalex.org/W2504388521","https://openalex.org/W2886161096","https://openalex.org/W2951506174","https://openalex.org/W2952109315","https://openalex.org/W2952630931","https://openalex.org/W2953170162","https://openalex.org/W2993265460","https://openalex.org/W3013605959","https://openalex.org/W3041512494","https://openalex.org/W6675338037","https://openalex.org/W6679815717","https://openalex.org/W6687322159","https://openalex.org/W6724773327","https://openalex.org/W6771191006"],"related_works":["https://openalex.org/W2256570403","https://openalex.org/W2074156223","https://openalex.org/W2103246366","https://openalex.org/W4391927655","https://openalex.org/W2901653204","https://openalex.org/W1980504858","https://openalex.org/W2102526470","https://openalex.org/W2613659923","https://openalex.org/W4378447789","https://openalex.org/W2126622512"],"abstract_inverted_index":{"High-throughput":[0],"single-cell":[1],"RNA":[2],"sequencing":[3],"(scRNA-seq)":[4],"data":[5,15,36,42,71,167],"processing":[6,43,72,87,101,128,168],"pipelines":[7,102],"integrate":[8],"multiple":[9,144],"modules":[10],"to":[11,16,50,112,141,175],"transform":[12],"raw":[13],"scRNA-seq":[14,41,52,70,90,133,166],"gene":[17],"expression":[18],"matrices,":[19],"including":[20],"barcode":[21],"processing,":[22],"sequence":[23],"quality":[24],"control,":[25],"genome":[26],"alignment":[27],"and":[28,92,195],"transcript":[29],"quantification.":[30],"With":[31],"the":[32,38,76,86,99,104,113,127,156,187],"rapid":[33],"growth":[34],"in":[35,118,186,193],"volume,":[37],"speed":[39,88],"of":[40,80,89,116],"pipeline":[44],"has":[45],"become":[46],"a":[47,66,119,131,181],"major":[48],"bottleneck":[49],"large-scale":[51],"studies.":[53],"We":[54],"present":[55],"scSpark":[56,83,123,160],"<sup":[57,60,170],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[58,61,171],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">XMBD</sup>":[59],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[62,172],"(denoted":[63],"as":[64],"scSpark),":[65],"cloud":[67,120],"computing":[68,78,121],"based":[69],"pipeline.":[73],"By":[74],"leveraging":[75],"in-memory":[77],"capability":[79],"Apache":[81],"Spark,":[82],"significantly":[84],"improves":[85],"data,":[91],"achieves":[93],"around":[94],"5-20":[95],"times":[96],"faster":[97],"than":[98],"state-of-the-art":[100],"under":[103],"same":[105],"CPU":[106],"core":[107],"consumption.":[108],"In":[109],"addition,":[110],"thanks":[111],"inherent":[114],"scalability":[115],"Spark":[117],"environment,":[122],"can":[124],"further":[125],"reduce":[126],"time":[129],"for":[130,190],"typical":[132],"dataset":[134],"(e.g.,":[135,147],"640":[136],"million":[137],"reads)":[138],"from":[139],"hours":[140],"minutes":[142],"when":[143],"computer":[145],"nodes":[146],"16)":[148],"are":[149,161],"used.":[150],"Biological":[151],"evaluation":[152],"also":[153],"confirmed":[154],"that":[155],"results":[157],"generated":[158],"by":[159],"highly":[162],"consistent":[163],"with":[164],"existing":[165],"pipelines.":[169],"XMBD":[173],"refers":[174],"Xiamen":[176,197],"Big":[177],"Data,":[178],"which":[179],"is":[180],"biomedical":[182],"open":[183],"software":[184],"initiative":[185],"National":[188],"Institute":[189],"Data":[191],"Science":[192],"Health":[194],"Medicine,":[196],"University,":[198],"China":[199]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
