{"id":"https://openalex.org/W4309467950","doi":"https://doi.org/10.1145/3571284","title":"User-driven Online Kernel Fusion for SYCL","display_name":"User-driven Online Kernel Fusion for SYCL","publication_year":2022,"publication_date":"2022-11-18","ids":{"openalex":"https://openalex.org/W4309467950","doi":"https://doi.org/10.1145/3571284"},"language":"en","primary_location":{"id":"doi:10.1145/3571284","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3571284","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3571284","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3571284","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014861968","display_name":"V\u00edctor P\u00e9rez","orcid":"https://orcid.org/0000-0002-3302-8339"},"institutions":[{"id":"https://openalex.org/I4210128426","display_name":"Codeplay (United Kingdom)","ror":"https://ror.org/03g46y557","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210128426"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"V\u00edctor P\u00e9rez","raw_affiliation_strings":["Codeplay Software Ltd., Scotland, UK"],"affiliations":[{"raw_affiliation_string":"Codeplay Software Ltd., Scotland, UK","institution_ids":["https://openalex.org/I4210128426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090242105","display_name":"Luk\u00e1\u0161 Sommer","orcid":"https://orcid.org/0000-0003-1918-3911"},"institutions":[{"id":"https://openalex.org/I4210128426","display_name":"Codeplay (United Kingdom)","ror":"https://ror.org/03g46y557","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210128426"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Lukas Sommer","raw_affiliation_strings":["Codeplay Software Ltd., Scotland, UK"],"affiliations":[{"raw_affiliation_string":"Codeplay Software Ltd., Scotland, UK","institution_ids":["https://openalex.org/I4210128426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050988939","display_name":"Victor Lom\u00fcller","orcid":"https://orcid.org/0000-0003-1726-4662"},"institutions":[{"id":"https://openalex.org/I4210128426","display_name":"Codeplay (United Kingdom)","ror":"https://ror.org/03g46y557","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210128426"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Victor Lom\u00fcller","raw_affiliation_strings":["Codeplay Software Ltd., Scotland, UK"],"affiliations":[{"raw_affiliation_string":"Codeplay Software Ltd., Scotland, UK","institution_ids":["https://openalex.org/I4210128426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026881457","display_name":"Kumudha Narasimhan","orcid":"https://orcid.org/0000-0002-1142-3039"},"institutions":[{"id":"https://openalex.org/I4210128426","display_name":"Codeplay (United Kingdom)","ror":"https://ror.org/03g46y557","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210128426"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kumudha Narasimhan","raw_affiliation_strings":["Codeplay Software Ltd., Scotland, UK"],"affiliations":[{"raw_affiliation_string":"Codeplay Software Ltd., Scotland, UK","institution_ids":["https://openalex.org/I4210128426"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028660206","display_name":"Mehdi Goli","orcid":"https://orcid.org/0000-0002-3520-9598"},"institutions":[{"id":"https://openalex.org/I4210128426","display_name":"Codeplay (United Kingdom)","ror":"https://ror.org/03g46y557","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210128426"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mehdi Goli","raw_affiliation_strings":["Codeplay Software Ltd., Scotland, UK"],"affiliations":[{"raw_affiliation_string":"Codeplay Software Ltd., Scotland, UK","institution_ids":["https://openalex.org/I4210128426"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5014861968"],"corresponding_institution_ids":["https://openalex.org/I4210128426"],"apc_list":null,"apc_paid":null,"fwci":0.403,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.60095419,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"20","issue":"2","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8802697658538818},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.6174488663673401},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5977170467376709},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5866162776947021},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5853347182273865},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5706285834312439},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.4393625259399414},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.411582887172699},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3417770564556122},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.32456815242767334},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.24700415134429932}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8802697658538818},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.6174488663673401},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5977170467376709},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5866162776947021},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5853347182273865},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5706285834312439},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.4393625259399414},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.411582887172699},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3417770564556122},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.32456815242767334},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.24700415134429932},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3571284","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3571284","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3571284","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3571284","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3571284","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3571284","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4309467950.pdf","grobid_xml":"https://content.openalex.org/works/W4309467950.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W1922926746","https://openalex.org/W1984222112","https://openalex.org/W2023415862","https://openalex.org/W2194775991","https://openalex.org/W2612667261","https://openalex.org/W2798341898","https://openalex.org/W2925895608","https://openalex.org/W3018019719","https://openalex.org/W3039426601","https://openalex.org/W3088415669","https://openalex.org/W3105362906","https://openalex.org/W3120676137","https://openalex.org/W3135606491","https://openalex.org/W4220818654","https://openalex.org/W4232718373","https://openalex.org/W4236177945","https://openalex.org/W4244917406","https://openalex.org/W6687483927"],"related_works":["https://openalex.org/W2784803070","https://openalex.org/W4367156293","https://openalex.org/W2391167130","https://openalex.org/W2092071486","https://openalex.org/W4283067488","https://openalex.org/W2460246254","https://openalex.org/W1915975010","https://openalex.org/W2138204413","https://openalex.org/W3093563898","https://openalex.org/W4287636201"],"abstract_inverted_index":{"Heterogeneous":[0],"programming":[1],"models":[2],"are":[3,101,178,198,215,255],"becoming":[4],"increasingly":[5],"popular":[6],"to":[7,37,49,149,162,174,200,217,233,237,249,263],"support":[8],"the":[9,30,67,81,92,113,139,191,223,234,239,251,264,268,273,285],"ever-evolving":[10],"hardware":[11],"architectures,":[12],"especially":[13],"for":[14,185,258],"new":[15],"and":[16,55,136,164,204,283],"emerging":[17],"specialized":[18],"accelerators":[19],"optimizing":[20],"specific":[21,206],"tasks.":[22],"While":[23,58,176],"such":[24,181,194],"programs":[25],"provide":[26],"performance":[27,47,274,286],"portability":[28],"of":[29,51,84,128,142,193,208,225,241,276],"existing":[31],"applications":[32,60],"across":[33],"various":[34],"heterogeneous":[35],"architectures":[36],"some":[38],"extent,":[39],"short-running":[40,65,78],"device":[41],"kernels":[42,66,79,85,117,156,168],"can":[43,69,73,133,146],"affect":[44],"an":[45,87,125,231],"application":[46],"due":[48],"overheads":[50],"data":[52],"transfer,":[53],"synchronization,":[54],"kernel":[56,123,228,242,265],"launch.":[57],"in":[59,86,94,288],"with":[61,157,169],"one":[62],"or":[63,267],"two":[64],"overhead":[68,159,171],"be":[70,74,134],"negligible,":[71],"it":[72,90],"noticeable":[75],"when":[76],"these":[77],"dominate":[80],"overall":[82],"number":[83],"application,":[88],"as":[89,182,212],"is":[91,124],"case":[93],"graph-based":[95],"neural":[96,281],"network":[97],"models,":[98],"where":[99],"there":[100,177],"several":[102,116],"small":[103],"memory-bound":[104],"nodes":[105],"alongside":[106],"few":[107],"large":[108],"compute-bound":[109],"nodes.":[110],"To":[111],"reduce":[112],"overhead,":[114],"combining":[115],"into":[118],"a":[119,151,195,201,213,226],"single,":[120],"more":[121],"optimized":[122],"active":[126],"area":[127],"research.":[129],"However,":[130],"this":[131],"task":[132],"time-consuming":[135],"error-prone":[137],"given":[138],"huge":[140],"set":[141],"potential":[143],"combinations.":[144],"This":[145,220],"push":[147],"programmers":[148,248],"seek":[150],"tradeoff":[152],"between":[153],"(a)":[154],"task-specific":[155],"low":[158],"but":[160,172],"hard":[161,216],"maintain":[163],"(b)":[165],"smaller":[166],"modular":[167],"higher":[170],"easier":[173],"maintain.":[175],"DSL-based":[179],"approaches,":[180],"those":[183],"provided":[184],"machine":[186],"learning":[187],"frameworks,":[188],"which":[189],"offer":[190],"possibility":[192],"fusion,":[196],"they":[197],"limited":[199],"particular":[202],"domain":[203,210],"exploit":[205],"knowledge":[207],"that":[209,254],"and,":[211],"consequence,":[214],"port":[218],"elsewhere.":[219],"study":[221,284],"explores":[222],"feasibility":[224],"user-driven":[227],"fusion":[229,259],"through":[230],"extension":[232],"SYCL":[235],"API":[236],"address":[238],"automation":[240],"fusion.":[243],"The":[244],"proposed":[245],"solution":[246],"requires":[247],"define":[250],"subgraph":[252],"regions":[253],"potentially":[256],"suitable":[257],"without":[260],"any":[261],"modification":[262],"code":[266],"function":[269],"signature.":[270],"We":[271],"evaluate":[272],"benefit":[275],"our":[277],"approach":[278],"on":[279],"common":[280],"networks":[282],"improvement":[287],"detail.":[289]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
