{"id":"https://openalex.org/W6893357614","doi":"https://doi.org/10.5281/zenodo.2240193","title":"From Loop Fusion to Kernel Fusion: A Domain-specific Approach to Locality Optimization","display_name":"From Loop Fusion to Kernel Fusion: A Domain-specific Approach to Locality Optimization","publication_year":2019,"publication_date":"2019-02-16","ids":{"openalex":"https://openalex.org/W6893357614","doi":"https://doi.org/10.5281/zenodo.2240193"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:2240193","is_oa":true,"landing_page_url":"https://zenodo.org/record/2240193","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"other","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/2240193","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Qiao, Bo","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Qiao, Bo","raw_affiliation_strings":["Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Reiche, Oliver","orcid":"https://orcid.org/0000-0002-5125-4508"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Reiche, Oliver","raw_affiliation_strings":["Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hannig, Frank","orcid":"https://orcid.org/0000-0003-3663-6484"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hannig, Frank","raw_affiliation_strings":["Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":null,"display_name":"Teich, J\u00fcrgen","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Teich, J\u00fcrgen","raw_affiliation_strings":["Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander University Erlangen-N\u00fcrnberg (FAU)","institution_ids":["https://openalex.org/I181369854"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I181369854"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11192","display_name":"Underwater Vehicles and Communication Systems","score":0.9067000150680542,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11192","display_name":"Underwater Vehicles and Communication Systems","score":0.9067000150680542,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11698","display_name":"Underwater Acoustics Research","score":0.02070000022649765,"subfield":{"id":"https://openalex.org/subfields/1910","display_name":"Oceanography"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.006599999964237213,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.9458000063896179},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5094000101089478},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4165000021457672},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.40450000762939453},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4016000032424927},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.39730000495910645},{"id":"https://openalex.org/keywords/loop-unrolling","display_name":"Loop unrolling","score":0.3799000084400177},{"id":"https://openalex.org/keywords/loop-fusion","display_name":"Loop fusion","score":0.3723999857902527}],"concepts":[{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.9458000063896179},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7807999849319458},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6884999871253967},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5094000101089478},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4165000021457672},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.40450000762939453},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4016000032424927},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.39730000495910645},{"id":"https://openalex.org/C76970557","wikidata":"https://www.wikidata.org/wiki/Q1869750","display_name":"Loop unrolling","level":3,"score":0.3799000084400177},{"id":"https://openalex.org/C82653869","wikidata":"https://www.wikidata.org/wiki/Q6675821","display_name":"Loop fusion","level":3,"score":0.3723999857902527},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.34700000286102295},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.3449999988079071},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C1306188","wikidata":"https://www.wikidata.org/wiki/Q4060687","display_name":"Nested loop join","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.27880001068115234},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.26980000734329224},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.2667999863624573},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C153247305","wikidata":"https://www.wikidata.org/wiki/Q835713","display_name":"Memory address","level":3,"score":0.2581999897956848},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25450000166893005}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:2240193","is_oa":true,"landing_page_url":"https://zenodo.org/record/2240193","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.2240193","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.2240193","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:2240193","is_oa":true,"landing_page_url":"https://zenodo.org/record/2240193","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"artifact":[1],"describes":[2],"the":[3,7,10,32,49,55,60,82,86,94,114,179,190,240],"steps":[4],"to":[5,38,44,58,80,206],"reproduce":[6],"results":[8],"for":[9,217,235],"CUDA":[11,71,98,122,142,161,229],"code":[12,57],"generation":[13],"with":[14,70,124,144,163,201],"kernel":[15],"fusion":[16],"in":[17,27,31,90,93,110,113,239],"Hipacc":[18],"(an":[19],"image":[20],"processing":[21],"DSL":[22],"and":[23,84,131,151,170,215],"source-to-source":[24],"compiler":[25],"embedded":[26],"C++),":[28],"as":[29,52,54,88,108],"presented":[30],"CGO19":[33],"paper":[34],"\"From":[35],"Loop":[36],"Fusion":[37],"Kernel":[39],"Fusion:":[40],"A":[41],"Domain-specific":[42],"Approach":[43],"Locality":[45],"Optimization\".":[46],"We":[47,103],"provide":[48],"original":[50],"binaries":[51],"well":[53],"source":[56],"regenerate":[59],"binaries,":[61],"which":[62],"can":[63],"be":[64],"executed":[65],"on":[66],"x86_64":[67],"Linux":[68,218],"system":[69],"enabled":[72,99],"GPUs.":[73],"Furthermore,":[74],"we":[75],"include":[76],"two":[77],"python":[78],"scripts":[79],"run":[81],"application":[83],"compute":[85],"statistics":[87],"depicted":[89],"Figure":[91],"6":[92],"paper.":[95],"Hardware":[96],"Dependencies:":[97,211],"GPUs":[100,200],"are":[101,204],"required.":[102],"used":[104],"three":[105,177],"Nvidia":[106,228],"cards,":[107],"discussed":[109],"Section":[111],"5.1":[112],"paper:":[115],"(a)":[116],"Geforce":[117,137],"GTX":[118,138],"745":[119],"facilitates":[120],"384":[121],"cores":[123,143,162],"a":[125,145,164],"base":[126,146,165],"clock":[127,147,166],"of":[128,148,167,182,193],"1,033":[129],"MHz":[130,133,150,153,169,172],"900":[132],"memory":[134,154,173,184],"clock.":[135,155,174],"(b)":[136],"680":[139],"has":[140,159],"1,536":[141],"1,058":[149],"3,004":[152],"(c)":[156],"Tesla":[157],"K20c":[158],"2,496":[160],"706":[168],"2,600":[171],"For":[175],"all":[176],"GPUs,":[178],"total":[180,191],"amount":[181],"shared":[183],"per":[185,196],"block":[186,197],"is":[187,198],"48":[188],"Kbytes,":[189],"number":[192],"registers":[194],"available":[195],"65,536.":[199],"similar":[202],"configurations":[203],"expected":[205],"generate":[207],"comparable":[208],"results.":[209],"Software":[210],"Clang/LLVM":[212],"(6.0),":[213],"compiler_rt":[214],"libcxx":[216],"(6.0).":[219],"CMake":[220],"(3.4":[221],"or":[222,226,232],"later),":[223],"Git":[224],"(2.7":[225],"later).":[227,233],"Driver":[230],"(9.0":[231],"OpenCV":[234],"producing":[236],"visual":[237],"output":[238],"samples.":[241]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
