{"id":"https://openalex.org/W1992558790","doi":"https://doi.org/10.1109/ipdpsw.2012.300","title":"Optimizing Data Warehousing Applications for GPUs Using Kernel Fusion/Fission","display_name":"Optimizing Data Warehousing Applications for GPUs Using Kernel Fusion/Fission","publication_year":2012,"publication_date":"2012-05-01","ids":{"openalex":"https://openalex.org/W1992558790","doi":"https://doi.org/10.1109/ipdpsw.2012.300","mag":"1992558790"},"language":"en","primary_location":{"id":"doi:10.1109/ipdpsw.2012.300","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw.2012.300","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE 26th International Parallel and Distributed Processing Symposium Workshops &amp; PhD Forum","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003741499","display_name":"Haicheng Wu","orcid":"https://orcid.org/0000-0003-0357-9049"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haicheng Wu","raw_affiliation_strings":["School of ECE, Georgia Institute of Technology, Atlanta, GA, USA","[Sch. of ECE, Georgia Inst. of Technol., Atlanta, GA, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of ECE, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"[Sch. of ECE, Georgia Inst. of Technol., Atlanta, GA, USA]","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006631932","display_name":"Gregory Diamos","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gregory Diamos","raw_affiliation_strings":["Nvidia Research, Santa Clara, CA, USA",", NVIDIA Research, Santa Clara, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nvidia Research, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":", NVIDIA Research, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113414416","display_name":"Jin Wang","orcid":"https://orcid.org/0000-0002-8298-4378"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jin Wang","raw_affiliation_strings":["School of ECE, Georgia Institute of Technology, Atlanta, GA, USA","[Sch. of ECE, Georgia Inst. of Technol., Atlanta, GA, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of ECE, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"[Sch. of ECE, Georgia Inst. of Technol., Atlanta, GA, USA]","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034416611","display_name":"Srihari Cadambi","orcid":null},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Srihari Cadambi","raw_affiliation_strings":["NEC Laboratories of America, Inc., Princeton, NJ, USA","NEC-Labs America, Princeton, NJ, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NEC Laboratories of America, Inc., Princeton, NJ, USA","institution_ids":[]},{"raw_affiliation_string":"NEC-Labs America, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111855694","display_name":"Sudhakar Yalamanchili","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sudhakar Yalamanchili","raw_affiliation_strings":["School of ECE, Georgia Institute of Technology, Atlanta, GA, USA","[Sch. of ECE, Georgia Inst. of Technol., Atlanta, GA, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of ECE, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"[Sch. of ECE, Georgia Inst. of Technol., Atlanta, GA, USA]","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042424184","display_name":"Srimat Chakradhar","orcid":"https://orcid.org/0000-0003-3530-3901"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Srimat Chakradhar","raw_affiliation_strings":["NEC Laboratories of America, Inc., Princeton, NJ, USA","NEC-Labs America, Princeton, NJ, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NEC Laboratories of America, Inc., Princeton, NJ, USA","institution_ids":[]},{"raw_affiliation_string":"NEC-Labs America, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.1921,"has_fulltext":false,"cited_by_count":57,"citation_normalized_percentile":{"value":0.97729716,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2433","last_page":"2442"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8513448238372803},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6664125919342041},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6658397316932678},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.563667893409729},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5067611336708069},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.48926612734794617},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.4438049793243408},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.36389148235321045},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.20468461513519287}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8513448238372803},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6664125919342041},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6658397316932678},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.563667893409729},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5067611336708069},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.48926612734794617},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.4438049793243408},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.36389148235321045},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.20468461513519287},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipdpsw.2012.300","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw.2012.300","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE 26th International Parallel and Distributed Processing Symposium Workshops &amp; PhD Forum","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1967851095","https://openalex.org/W2004477204","https://openalex.org/W2009036829","https://openalex.org/W2023770077","https://openalex.org/W2030490908","https://openalex.org/W2045271686","https://openalex.org/W2050513283","https://openalex.org/W2054468361","https://openalex.org/W2068418796","https://openalex.org/W2068810256","https://openalex.org/W2078391824","https://openalex.org/W2081124914","https://openalex.org/W2100415730","https://openalex.org/W2104958979","https://openalex.org/W2109515201","https://openalex.org/W2127766448","https://openalex.org/W2128046183","https://openalex.org/W2136083615","https://openalex.org/W2138163628","https://openalex.org/W2257258878","https://openalex.org/W2283571372","https://openalex.org/W2406413350","https://openalex.org/W4251841159","https://openalex.org/W6642291734","https://openalex.org/W6713725621"],"related_works":["https://openalex.org/W3062287","https://openalex.org/W2380390332","https://openalex.org/W2076165488","https://openalex.org/W4385574943","https://openalex.org/W2769189194","https://openalex.org/W1537323515","https://openalex.org/W2353852602","https://openalex.org/W2120249721","https://openalex.org/W4293390906","https://openalex.org/W1548247965"],"abstract_inverted_index":{"Data":[0],"warehousing":[1],"applications":[2],"represent":[3],"an":[4],"emergent":[5],"application":[6],"arena":[7],"that":[8,31,46,153],"requires":[9],"the":[10,50,56,61,86,100,160,184,194,205,226,235],"processing":[11],"of":[12,20,52,71,103,142,179,186],"relational":[13,189],"queries":[14,230],"and":[15,59,63,94,122,132,135,139,156,162,169,204,229,232,245],"computations":[16,155],"over":[17],"massive":[18],"amounts":[19],"data.":[21],"Modern":[22],"general":[23],"purpose":[24],"GPUs":[25],"are":[26,43,200,207],"high":[27],"core":[28],"count":[29],"architectures":[30],"potentially":[32],"offer":[33],"substantial":[34],"improvements":[35,219],"in":[36,79,85,202,234],"throughput":[37,218],"for":[38,225,247],"these":[39,76],"applications.":[40],"However,":[41],"there":[42],"significant":[44],"challenges":[45],"arise":[47],"due":[48],"to":[49,74,107,176,223],"overheads":[51],"data":[53,117,127,157,217],"movement":[54,118,128],"through":[55],"memory":[57,131,143],"hierarchy":[58],"between":[60,119,129,159],"GPU":[62,105,120,123,130,161],"host":[64,163],"CPU.":[65],"This":[66],"paper":[67],"proposes":[68],"a":[69,148,177],"set":[70,178],"compiler":[72],"optimizations":[73,84],"address":[75],"challenges.":[77],"Inspired":[78],"part":[80],"by":[81],"loop":[82],"fusion/fission":[83,187],"scientific":[87],"computing":[88],"community,":[89],"we":[90,215],"propose":[91],"kernel":[92,95,149],"fusion":[93,98],"fission.":[96],"Kernel":[97,145],"fuses":[99],"code":[101],"bodies":[102],"two":[104],"kernels":[106,199],"i)":[108],"eliminate":[109],"redundant":[110],"operations":[111],"across":[112],"dependent":[113],"kernels,":[114],"ii)":[115],"reduce":[116,126],"registers":[121],"memory,":[124,134],"iii)":[125],"CPU":[133,164],"iv)":[136],"improve":[137],"spatial":[138],"temporal":[140],"locality":[141],"references.":[144],"fission":[146,170],"partitions":[147],"into":[150],"segments":[151],"such":[152],"segment":[154],"transfers":[158],"can":[165,171],"be":[166,173],"overlapped.":[167],"Fusion":[168],"also":[172],"applied":[174],"concurrently":[175],"kernels.":[180],"We":[181,239],"empirically":[182],"evaluate":[183],"benefits":[185],"on":[188],"algebra":[190],"operators":[191],"drawn":[192],"from":[193,221],"TPC-H":[195,236],"benchmark":[196,237],"suite.":[197,238],"All":[198],"implemented":[201],"CUDA":[203],"experiments":[206],"performed":[208],"with":[209],"NVIDIA":[210],"Fermi":[211],"GPUs.":[212],"In":[213],"general,":[214],"observed":[216],"ranging":[220],"13.1%":[222],"41.4%":[224],"SELECT":[227],"operator":[228],"Q1":[231],"Q21":[233],"present":[240],"key":[241],"insights,":[242],"lessons":[243],"learned,":[244],"opportunities":[246],"further":[248],"improvements.":[249]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":9},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
