{"id":"https://openalex.org/W6950187062","doi":"https://doi.org/10.5281/zenodo.4046174","title":"From Loop Fusion to Kernel Fusion: A Domain-specific Approach to Locality Optimization","display_name":"From Loop Fusion to Kernel Fusion: A Domain-specific Approach to Locality Optimization","publication_year":2019,"publication_date":"2019-02-16","ids":{"openalex":"https://openalex.org/W6950187062","doi":"https://doi.org/10.5281/zenodo.4046174"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:4046174","is_oa":true,"landing_page_url":"https://zenodo.org/record/4046174","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"other","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/4046174","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Qiao, Bo","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Qiao, Bo","raw_affiliation_strings":["Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Reiche, Oliver","orcid":"https://orcid.org/0000-0002-5125-4508"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Reiche, Oliver","raw_affiliation_strings":["Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hannig, Frank","orcid":"https://orcid.org/0000-0003-3663-6484"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hannig, Frank","raw_affiliation_strings":["Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":null,"display_name":"Teich, J\u00fcrgen","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Teich, J\u00fcrgen","raw_affiliation_strings":["Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)","institution_ids":["https://openalex.org/I181369854"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I181369854"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T13370","display_name":"Diverse Scientific and Economic Studies","score":0.035599999129772186,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13370","display_name":"Diverse Scientific and Economic Studies","score":0.035599999129772186,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11964","display_name":"Pregnancy and Medication Impact","score":0.021199999377131462,"subfield":{"id":"https://openalex.org/subfields/2739","display_name":"Public Health, Environmental and Occupational Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13107","display_name":"Congenital limb and hand anomalies","score":0.013899999670684338,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.9409999847412109},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6409000158309937},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5142999887466431},{"id":"https://openalex.org/keywords/loop-fusion","display_name":"Loop fusion","score":0.44350001215934753},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4408999979496002},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4198000133037567},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.39629998803138733},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.3894999921321869},{"id":"https://openalex.org/keywords/loop-unrolling","display_name":"Loop unrolling","score":0.3856000006198883}],"concepts":[{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.9409999847412109},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.753000020980835},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7166000008583069},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6409000158309937},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5142999887466431},{"id":"https://openalex.org/C82653869","wikidata":"https://www.wikidata.org/wiki/Q6675821","display_name":"Loop fusion","level":3,"score":0.44350001215934753},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4408999979496002},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4198000133037567},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.39629998803138733},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.3894999921321869},{"id":"https://openalex.org/C76970557","wikidata":"https://www.wikidata.org/wiki/Q1869750","display_name":"Loop unrolling","level":3,"score":0.3856000006198883},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3625999987125397},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.35569998621940613},{"id":"https://openalex.org/C1306188","wikidata":"https://www.wikidata.org/wiki/Q4060687","display_name":"Nested loop join","level":2,"score":0.35030001401901245},{"id":"https://openalex.org/C184670325","wikidata":"https://www.wikidata.org/wiki/Q512604","display_name":"Loop (graph theory)","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2937000095844269},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.29269999265670776},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2752000093460083},{"id":"https://openalex.org/C11799548","wikidata":"https://www.wikidata.org/wiki/Q6675847","display_name":"Loop tiling","level":3,"score":0.2354000061750412},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.2328999936580658},{"id":"https://openalex.org/C134718785","wikidata":"https://www.wikidata.org/wiki/Q6675821","display_name":"Loop fission","level":3,"score":0.22910000383853912},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.22769999504089355},{"id":"https://openalex.org/C153247305","wikidata":"https://www.wikidata.org/wiki/Q835713","display_name":"Memory address","level":3,"score":0.22100000083446503},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.21649999916553497},{"id":"https://openalex.org/C178693496","wikidata":"https://www.wikidata.org/wiki/Q911691","display_name":"Clock rate","level":3,"score":0.20250000059604645},{"id":"https://openalex.org/C41431624","wikidata":"https://www.wikidata.org/wiki/Q1053357","display_name":"Block size","level":3,"score":0.2021999955177307},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.20020000636577606},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.19930000603199005},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.19840000569820404},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.1949000060558319},{"id":"https://openalex.org/C139571649","wikidata":"https://www.wikidata.org/wiki/Q1156793","display_name":"Program optimization","level":3,"score":0.19370000064373016},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.18940000236034393},{"id":"https://openalex.org/C2780365336","wikidata":"https://www.wikidata.org/wiki/Q25047934","display_name":"Single-core","level":2,"score":0.17479999363422394},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.17470000684261322},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.17170000076293945},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.17100000381469727},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.1703999936580658},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.1695999950170517}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:4046174","is_oa":true,"landing_page_url":"https://zenodo.org/record/4046174","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.4046174","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.4046174","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:4046174","is_oa":true,"landing_page_url":"https://zenodo.org/record/4046174","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[{"score":0.4048110544681549,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"artifact":[1],"describes":[2],"the":[3,7,10,32,65,130,141,191],"steps":[4],"to":[5,38,44,157],"reproduce":[6],"results":[8],"for":[9,168,186],"CUDA":[11,49,73,93,112,180],"code":[12],"generation":[13],"with":[14,75,95,114,152],"kernel":[15],"fusion":[16],"in":[17,27,31,61,64,190],"Hipacc":[18],"(an":[19],"image":[20],"processing":[21],"DSL":[22],"and":[23,82,102,121,166],"source-to-source":[24],"compiler":[25],"embedded":[26],"C++),":[28],"as":[29,59],"presented":[30],"CGO19":[33],"paper":[34],"\"From":[35],"Loop":[36],"Fusion":[37],"Kernel":[39],"Fusion:":[40],"A":[41],"Domain-specific":[42],"Approach":[43],"Locality":[45],"Optimization\".":[46],"Hardware":[47],"Dependencies:":[48,162],"enabled":[50],"GPUs":[51,151],"are":[52,155],"required.":[53],"We":[54],"used":[55],"three":[56,128],"Nvidia":[57,179],"cards,":[58],"discussed":[60],"Section":[62],"5.1":[63],"paper:":[66],"(a)":[67],"Geforce":[68,88],"GTX":[69,89],"745":[70],"facilitates":[71],"384":[72],"cores":[74,94,113],"a":[76,96,115],"base":[77,97,116],"clock":[78,98,117],"of":[79,99,118,133,144],"1,033":[80],"MHz":[81,84,101,104,120,123],"900":[83],"memory":[85,105,124,135],"clock.":[86,106,125],"(b)":[87],"680":[90],"has":[91,110],"1,536":[92],"1,058":[100],"3,004":[103],"(c)":[107],"Tesla":[108],"K20c":[109],"2,496":[111],"706":[119],"2,600":[122],"For":[126],"all":[127],"GPUs,":[129],"total":[131,142],"amount":[132],"shared":[134],"per":[136,147],"block":[137,148],"is":[138,149],"48":[139],"Kbytes,":[140],"number":[143],"registers":[145],"available":[146],"65,536.":[150],"similar":[153],"configurations":[154],"expected":[156],"generate":[158],"comparable":[159],"results.":[160],"Software":[161],"Clang/LLVM":[163],"(8.0),":[164],"compiler_rt":[165],"libcxx":[167],"Linux":[169],"(8.0).":[170],"CMake":[171],"(3.4":[172],"or":[173,177,183],"later),":[174],"Git":[175],"(2.7":[176],"later).":[178,184],"Driver":[181],"(10.0":[182],"OpenCV":[185],"producing":[187],"visual":[188],"output":[189],"samples.":[192]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
