{"id":"https://openalex.org/W2102394168","doi":"https://doi.org/10.1145/2716282.2716284","title":"Helium: a transparent inter-kernel optimizer for OpenCL","display_name":"Helium: a transparent inter-kernel optimizer for OpenCL","publication_year":2015,"publication_date":"2015-02-03","ids":{"openalex":"https://openalex.org/W2102394168","doi":"https://doi.org/10.1145/2716282.2716284","mag":"2102394168"},"language":"en","primary_location":{"id":"doi:10.1145/2716282.2716284","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2716282.2716284","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th Workshop on General Purpose Processing using GPUs","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/bbe048d0-7d8e-4167-9ec2-42e46094f8d9","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000511414","display_name":"Thibaut Lutz","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Thibaut Lutz","raw_affiliation_strings":["University of Edinburgh, UK","University of Edinburgh (UK)"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]},{"raw_affiliation_string":"University of Edinburgh (UK)","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012290201","display_name":"Christian Fensch","orcid":null},"institutions":[{"id":"https://openalex.org/I32062511","display_name":"Heriot-Watt University","ror":"https://ror.org/04mghma93","country_code":"GB","type":"education","lineage":["https://openalex.org/I32062511"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Christian Fensch","raw_affiliation_strings":["Heriot-Watt University, UK","Heriot Watt University, UK#TAB#"],"affiliations":[{"raw_affiliation_string":"Heriot-Watt University, UK","institution_ids":["https://openalex.org/I32062511"]},{"raw_affiliation_string":"Heriot Watt University, UK#TAB#","institution_ids":["https://openalex.org/I32062511"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032376930","display_name":"Murray Cole","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Murray Cole","raw_affiliation_strings":["University of Edinburgh, UK","University of Edinburgh (UK)"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]},{"raw_affiliation_string":"University of Edinburgh (UK)","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5000511414"],"corresponding_institution_ids":["https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":2.2609,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.88005767,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"70","last_page":"80"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9574999809265137,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8592207431793213},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.7116949558258057},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5865941047668457},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5785567760467529},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.524431049823761},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5140432119369507},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5100489258766174},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.35402733087539673},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2565659284591675}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8592207431793213},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.7116949558258057},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5865941047668457},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5785567760467529},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.524431049823761},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5140432119369507},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5100489258766174},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.35402733087539673},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2565659284591675},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2716282.2716284","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2716282.2716284","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th Workshop on General Purpose Processing using GPUs","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/bbe048d0-7d8e-4167-9ec2-42e46094f8d9","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/bbe048d0-7d8e-4167-9ec2-42e46094f8d9","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Lutz, T, Fensch, C & Cole, M 2015, Helium: a transparent inter-kernel optimizer for OpenCL. in GPGPU 2015 Proceedings of the 8th Workshop on General Purpose Processing using GPUs. pp. 70-80. https://doi.org/10.1145/2716282.2716284","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/bbe048d0-7d8e-4167-9ec2-42e46094f8d9","is_oa":false,"landing_page_url":"https://www.research.ed.ac.uk/portal/en/publications/helium-a-transparent-interkernel-optimizer-for-opencl(bbe048d0-7d8e-4167-9ec2-42e46094f8d9).html","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/bbe048d0-7d8e-4167-9ec2-42e46094f8d9","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/bbe048d0-7d8e-4167-9ec2-42e46094f8d9","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Lutz, T, Fensch, C & Cole, M 2015, Helium: a transparent inter-kernel optimizer for OpenCL. in GPGPU 2015 Proceedings of the 8th Workshop on General Purpose Processing using GPUs. pp. 70-80. https://doi.org/10.1145/2716282.2716284","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1533528239","https://openalex.org/W1989562524","https://openalex.org/W1992851788","https://openalex.org/W2007920703","https://openalex.org/W2032039029","https://openalex.org/W2055312318","https://openalex.org/W2088840230","https://openalex.org/W2128329055","https://openalex.org/W2134408405","https://openalex.org/W2162530779","https://openalex.org/W2164106630","https://openalex.org/W2293879958","https://openalex.org/W2561675875","https://openalex.org/W4235762625","https://openalex.org/W4235832836","https://openalex.org/W4246430693"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W17155033","https://openalex.org/W2150291671","https://openalex.org/W2013643406","https://openalex.org/W2027972911","https://openalex.org/W4386541577","https://openalex.org/W3081644756"],"abstract_inverted_index":{"State":[0],"of":[1,6,14,38,60,179,192,203],"the":[2,12,36,148,159,175],"art":[3],"automatic":[4],"optimization":[5,23],"OpenCL":[7,91],"applications":[8,26,53],"focuses":[9],"on":[10,139,147],"improving":[11],"performance":[13],"individual":[15],"compute":[16,180],"kernels.":[17],"Programmers":[18],"address":[19],"opportunities":[20,72,98],"for":[21,52,99],"inter-kernel":[22],"in":[24,114],"specific":[25],"by":[27],"ad-hoc":[28],"hand":[29,206],"tuning:":[30],"manually":[31],"fusing":[32],"kernels":[33],"together.":[34],"However,":[35],"complexity":[37],"interactions":[39],"between":[40],"host":[41],"and":[42,96,109,122,161,199],"kernel":[43,61,116,181],"code":[44,84,149,173,198],"makes":[45],"this":[46],"approach":[47],"weak":[48],"or":[49,63],"even":[50,163],"unviable":[51],"involving":[54],"more":[55],"than":[56,166],"a":[57,64,89,111],"small":[58],"number":[59],"invocations":[62],"highly":[65],"dynamic":[66,177],"control":[67],"flow,":[68],"leaving":[69],"substantial":[70],"potential":[71],"unexplored.":[73],"It":[74],"also":[75],"leads":[76],"to":[77,82,126,134,194],"an":[78,127,200],"over":[79,196,205],"complex,":[80],"hard":[81],"maintain":[83],"base.":[85,150],"We":[86],"present":[87],"Helium,":[88],"transparent":[90],"overlay":[92],"which":[93,115],"discovers,":[94],"manipulates":[95],"exploits":[97],"inter-and":[100],"intra-kernel":[101],"optimization.":[102],"Helium":[103,155,170],"is":[104],"implemented":[105],"as":[106],"preloaded":[107],"library":[108],"uses":[110],"delay-optimize-replay":[112],"mechanism":[113],"calls":[117],"are":[118],"intercepted,":[119],"collectively":[120],"optimized,":[121],"then":[123],"executed":[124],"according":[125],"improved":[128],"execution":[129],"plan.":[130],"This":[131],"allows":[132],"us":[133],"benefit":[135],"from":[136],"composite":[137],"optimizations,":[138],"large,":[140],"dynamically":[141],"complex":[142],"applications,":[143],"with":[144],"no":[145],"impact":[146],"Our":[151],"results":[152],"show":[153],"that":[154],"obtains":[156],"at":[157],"least":[158],"same,":[160],"frequently":[162],"better":[164],"performance,":[165],"carefully":[167],"handtuned":[168],"code.":[169,208],"outperforms":[171],"hand-optimized":[172],"where":[174],"exact":[176],"composition":[178],"cannot":[182],"be":[183],"known":[184],"statically.":[185],"In":[186],"these":[187],"cases,":[188],"we":[189],"demonstrate":[190],"speedups":[191],"up":[193],"3x":[195],"unoptimized":[197],"average":[201],"speedup":[202],"1.4x":[204],"optimized":[207]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
