{"id":"https://openalex.org/W2056095876","doi":"https://doi.org/10.1109/bigdata.2014.7004293","title":"Combining Hadoop and GPU to preprocess large Affymetrix microarray data","display_name":"Combining Hadoop and GPU to preprocess large Affymetrix microarray data","publication_year":2014,"publication_date":"2014-10-01","ids":{"openalex":"https://openalex.org/W2056095876","doi":"https://doi.org/10.1109/bigdata.2014.7004293","mag":"2056095876"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2014.7004293","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2014.7004293","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010348994","display_name":"Sufeng Niu","orcid":"https://orcid.org/0000-0002-2826-0301"},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sufeng Niu","raw_affiliation_strings":["Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, USA","Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, 29634 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, USA","institution_ids":["https://openalex.org/I8078737"]},{"raw_affiliation_string":"Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, 29634 USA","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101866860","display_name":"Guangyu Yang","orcid":"https://orcid.org/0000-0002-3612-8618"},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guangyu Yang","raw_affiliation_strings":["School of Computing, Clemson University, Clemson, SC, USA","School of Computing, Clemson University, Clemson, SC 29634 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Clemson University, Clemson, SC, USA","institution_ids":["https://openalex.org/I8078737"]},{"raw_affiliation_string":"School of Computing, Clemson University, Clemson, SC 29634 USA","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027452675","display_name":"Nilim Sarma","orcid":null},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nilim Sarma","raw_affiliation_strings":["Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, USA","Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, 29634 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, USA","institution_ids":["https://openalex.org/I8078737"]},{"raw_affiliation_string":"Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, 29634 USA","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108111002","display_name":"Pengfei Xuan","orcid":null},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pengfei Xuan","raw_affiliation_strings":["School of Computing, Clemson University, Clemson, SC, USA","School of Computing, Clemson University, Clemson, SC 29634 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Clemson University, Clemson, SC, USA","institution_ids":["https://openalex.org/I8078737"]},{"raw_affiliation_string":"School of Computing, Clemson University, Clemson, SC 29634 USA","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043923029","display_name":"Melissa C. Smith","orcid":"https://orcid.org/0000-0003-0798-8536"},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Melissa C. Smith","raw_affiliation_strings":["Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, USA","Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, 29634 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, USA","institution_ids":["https://openalex.org/I8078737"]},{"raw_affiliation_string":"Holcombe Department of Electrical and Computer Engineering, Clemson University, Clemson, SC, 29634 USA","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047296304","display_name":"Pradip K. Srimani","orcid":null},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pradip Srimani","raw_affiliation_strings":["School of Computing, Clemson University, Clemson, SC, USA","School of Computing, Clemson University, Clemson, SC 29634 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Clemson University, Clemson, SC, USA","institution_ids":["https://openalex.org/I8078737"]},{"raw_affiliation_string":"School of Computing, Clemson University, Clemson, SC 29634 USA","institution_ids":["https://openalex.org/I8078737"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100683466","display_name":"Feng Luo","orcid":"https://orcid.org/0000-0002-4813-2403"},"institutions":[{"id":"https://openalex.org/I8078737","display_name":"Clemson University","ror":"https://ror.org/037s24f05","country_code":"US","type":"education","lineage":["https://openalex.org/I8078737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feng Luo","raw_affiliation_strings":["School of Computing, Clemson University, Clemson, SC, USA","School of Computing, Clemson University, Clemson, SC 29634 USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Clemson University, Clemson, SC, USA","institution_ids":["https://openalex.org/I8078737"]},{"raw_affiliation_string":"School of Computing, Clemson University, Clemson, SC 29634 USA","institution_ids":["https://openalex.org/I8078737"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5693,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.67068527,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"11","issue":null,"first_page":"692","last_page":"700"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9768999814987183,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8157804608345032},{"id":"https://openalex.org/keywords/microarray-databases","display_name":"Microarray databases","score":0.7428528070449829},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.7250839471817017},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5852275490760803},{"id":"https://openalex.org/keywords/microarray-analysis-techniques","display_name":"Microarray analysis techniques","score":0.5839333534240723},{"id":"https://openalex.org/keywords/gene-chip-analysis","display_name":"Gene chip analysis","score":0.5493997931480408},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5028733611106873},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4970689117908478},{"id":"https://openalex.org/keywords/microarray","display_name":"Microarray","score":0.47941848635673523},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.452620267868042},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.4502328634262085},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4427454471588135},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4375761151313782},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.3972734808921814},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.2303503453731537},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20998653769493103},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.07341328263282776}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8157804608345032},{"id":"https://openalex.org/C548314002","wikidata":"https://www.wikidata.org/wiki/Q6839218","display_name":"Microarray databases","level":5,"score":0.7428528070449829},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.7250839471817017},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5852275490760803},{"id":"https://openalex.org/C8415881","wikidata":"https://www.wikidata.org/wiki/Q6839217","display_name":"Microarray analysis techniques","level":4,"score":0.5839333534240723},{"id":"https://openalex.org/C24361400","wikidata":"https://www.wikidata.org/wiki/Q6839217","display_name":"Gene chip analysis","level":5,"score":0.5493997931480408},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5028733611106873},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4970689117908478},{"id":"https://openalex.org/C186836561","wikidata":"https://www.wikidata.org/wiki/Q1931516","display_name":"Microarray","level":4,"score":0.47941848635673523},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.452620267868042},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.4502328634262085},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4427454471588135},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4375761151313782},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.3972734808921814},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2303503453731537},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20998653769493103},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.07341328263282776},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2014.7004293","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2014.7004293","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1309515","https://openalex.org/W1118098720","https://openalex.org/W1678662003","https://openalex.org/W1863819094","https://openalex.org/W1970156673","https://openalex.org/W1984222112","https://openalex.org/W1988425770","https://openalex.org/W2020541351","https://openalex.org/W2040513965","https://openalex.org/W2060312180","https://openalex.org/W2107911628","https://openalex.org/W2108157916","https://openalex.org/W2116634113","https://openalex.org/W2120865735","https://openalex.org/W2122166096","https://openalex.org/W2129817042","https://openalex.org/W2130383124","https://openalex.org/W2130494035","https://openalex.org/W2159096710","https://openalex.org/W2170989872","https://openalex.org/W2173213060","https://openalex.org/W2260238747","https://openalex.org/W2290323698","https://openalex.org/W4229666556","https://openalex.org/W4238202755","https://openalex.org/W4247420482","https://openalex.org/W6696490886"],"related_works":["https://openalex.org/W2151046618","https://openalex.org/W1972148443","https://openalex.org/W1969233021","https://openalex.org/W2167646277","https://openalex.org/W2063573318","https://openalex.org/W2388314963","https://openalex.org/W3158047141","https://openalex.org/W1656096860","https://openalex.org/W2027443981","https://openalex.org/W2360624069"],"abstract_inverted_index":{"High":[0],"density":[1],"oligonucleotide":[2],"array":[3],"(microarray)":[4],"from":[5],"Affymetrix":[6],"has":[7],"been":[8],"widely":[9],"used":[10],"for":[11,31,159],"the":[12,28,58,106,113,150,185],"measurements":[13],"of":[14,27,39,45,57,74,76,83,100,105,130,141,152],"gene":[15],"expressions.":[16],"Currently,":[17],"public":[18,186],"data":[19,48,109,183],"repositories,":[20],"such":[21],"as":[22],"Gene":[23],"Expression":[24],"Omnibus":[25],"(GEO)":[26],"National":[29],"Center":[30],"Biotechnology":[32],"Information":[33],"(NCBI),":[34],"have":[35],"accumulated":[36],"large":[37,69,126,135,181],"amounts":[38],"microarray":[40,47,60,70,84,127,136,182],"data.":[41,128],"Efficient":[42],"integrative":[43],"analysis":[44],"those":[46],"will":[49,174],"provide":[50],"significant":[51],"knowledge":[52],"about":[53],"biological":[54],"systems.":[55],"None":[56],"existing":[59],"preprocessing":[61,79],"and":[62,80,89,112,154,167],"quality":[63,81],"assessment":[64,82],"tools":[65,101,133],"can":[66],"handle":[67],"very":[68],"datasets":[71,85,137],"with":[72,138],"tens":[73],"thousands":[75,140],"experiments.":[77],"The":[78],"contain":[86,164],"both":[87,165],"data-intensive":[88,166],"compute-intensive":[90,168],"tasks.":[91,169],"In":[92],"this":[93],"paper,":[94],"we":[95],"develop":[96],"a":[97,103],"new":[98,132,171],"set":[99,173],"using":[102],"mix":[104],"Hadoop":[107,153],"(for":[108,119],"intensive":[110,121],"tasks)":[111,122],"General-Purpose":[114],"Graphics":[115],"Processing":[116],"Units":[117],"(GPGPUs)":[118],"compute":[120],"to":[123,178],"efficiently":[124],"process":[125],"Evaluation":[129],"our":[131],"on":[134],"ten":[139],"experiments":[142],"showed":[143],"promising":[144],"superior":[145],"performance.":[146],"We":[147],"demonstrate":[148],"that":[149,163],"combination":[151],"GPGPU":[155],"computation":[156],"is":[157],"effective":[158],"complex":[160],"scientific":[161],"applications":[162],"Our":[170],"tool":[172],"make":[175],"it":[176],"possible":[177],"utilize":[179],"valuable":[180],"in":[184],"repositories.":[187]},"counts_by_year":[{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
