{"id":"https://openalex.org/W3172685698","doi":"https://doi.org/10.1145/3447818.3461665","title":"Inter-loop optimization in RAJA using loop chains","display_name":"Inter-loop optimization in RAJA using loop chains","publication_year":2021,"publication_date":"2021-06-03","ids":{"openalex":"https://openalex.org/W3172685698","doi":"https://doi.org/10.1145/3447818.3461665","mag":"3172685698"},"language":"en","primary_location":{"id":"doi:10.1145/3447818.3461665","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3447818.3461665","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3447818.3461665","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3447818.3461665","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019625135","display_name":"Brandon Neth","orcid":null},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Brandon Neth","raw_affiliation_strings":["University of Arizona"],"affiliations":[{"raw_affiliation_string":"University of Arizona","institution_ids":["https://openalex.org/I138006243"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083906258","display_name":"Tom Scogland","orcid":"https://orcid.org/0000-0001-7234-5743"},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas R. W. Scogland","raw_affiliation_strings":["Lawrence Livermore National Laboratory"],"affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory","institution_ids":["https://openalex.org/I1282311441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058719424","display_name":"Bronis R. de Supinski","orcid":"https://orcid.org/0000-0002-0339-1006"},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bronis R. de Supinski","raw_affiliation_strings":["Lawrence Livermore National Laboratory"],"affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory","institution_ids":["https://openalex.org/I1282311441"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022191670","display_name":"Michelle Mills Strout","orcid":"https://orcid.org/0000-0001-7646-4766"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michelle Mills Strout","raw_affiliation_strings":["University of Arizona"],"affiliations":[{"raw_affiliation_string":"University of Arizona","institution_ids":["https://openalex.org/I138006243"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5019625135"],"corresponding_institution_ids":["https://openalex.org/I138006243"],"apc_list":null,"apc_paid":null,"fwci":0.2303,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.46670874,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/loop-tiling","display_name":"Loop tiling","score":0.8756954669952393},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8520635366439819},{"id":"https://openalex.org/keywords/loop-fusion","display_name":"Loop fusion","score":0.8389203548431396},{"id":"https://openalex.org/keywords/loop-fission","display_name":"Loop fission","score":0.7629070281982422},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6986227035522461},{"id":"https://openalex.org/keywords/loop","display_name":"Loop (graph theory)","score":0.6109613180160522},{"id":"https://openalex.org/keywords/raja","display_name":"Raja","score":0.6106153726577759},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5397087335586548},{"id":"https://openalex.org/keywords/nested-loop-join","display_name":"Nested loop join","score":0.5078228116035461},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.507820188999176},{"id":"https://openalex.org/keywords/code-refactoring","display_name":"Code refactoring","score":0.490984708070755},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4511246085166931},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.4407809376716614},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.43191972374916077},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.4152795076370239},{"id":"https://openalex.org/keywords/inner-loop","display_name":"Inner loop","score":0.4134734570980072},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.40023601055145264},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2366030514240265},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.10525405406951904},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.10518074035644531}],"concepts":[{"id":"https://openalex.org/C11799548","wikidata":"https://www.wikidata.org/wiki/Q6675847","display_name":"Loop tiling","level":3,"score":0.8756954669952393},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8520635366439819},{"id":"https://openalex.org/C82653869","wikidata":"https://www.wikidata.org/wiki/Q6675821","display_name":"Loop fusion","level":3,"score":0.8389203548431396},{"id":"https://openalex.org/C134718785","wikidata":"https://www.wikidata.org/wiki/Q6675821","display_name":"Loop fission","level":3,"score":0.7629070281982422},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6986227035522461},{"id":"https://openalex.org/C184670325","wikidata":"https://www.wikidata.org/wiki/Q512604","display_name":"Loop (graph theory)","level":2,"score":0.6109613180160522},{"id":"https://openalex.org/C2779171504","wikidata":"https://www.wikidata.org/wiki/Q1834465","display_name":"Raja","level":2,"score":0.6106153726577759},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5397087335586548},{"id":"https://openalex.org/C1306188","wikidata":"https://www.wikidata.org/wiki/Q4060687","display_name":"Nested loop join","level":2,"score":0.5078228116035461},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.507820188999176},{"id":"https://openalex.org/C152752567","wikidata":"https://www.wikidata.org/wiki/Q116877","display_name":"Code refactoring","level":3,"score":0.490984708070755},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4511246085166931},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.4407809376716614},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.43191972374916077},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.4152795076370239},{"id":"https://openalex.org/C58716799","wikidata":"https://www.wikidata.org/wiki/Q6035648","display_name":"Inner loop","level":3,"score":0.4134734570980072},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.40023601055145264},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2366030514240265},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.10525405406951904},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.10518074035644531},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3447818.3461665","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3447818.3461665","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3447818.3461665","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Supercomputing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3447818.3461665","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3447818.3461665","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3447818.3461665","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3172685698.pdf","grobid_xml":"https://content.openalex.org/works/W3172685698.grobid-xml"},"referenced_works_count":59,"referenced_works":["https://openalex.org/W32775768","https://openalex.org/W152682494","https://openalex.org/W1507659254","https://openalex.org/W1554991828","https://openalex.org/W1979457157","https://openalex.org/W1984113567","https://openalex.org/W1987210269","https://openalex.org/W1989988531","https://openalex.org/W2003798513","https://openalex.org/W2023415862","https://openalex.org/W2032032242","https://openalex.org/W2034761517","https://openalex.org/W2048968833","https://openalex.org/W2050297469","https://openalex.org/W2055312318","https://openalex.org/W2078794610","https://openalex.org/W2084917734","https://openalex.org/W2124438843","https://openalex.org/W2143609451","https://openalex.org/W2147578933","https://openalex.org/W2156841718","https://openalex.org/W2158626113","https://openalex.org/W2160106616","https://openalex.org/W2166906890","https://openalex.org/W2167713801","https://openalex.org/W2168412050","https://openalex.org/W2412412865","https://openalex.org/W2471164860","https://openalex.org/W2506485539","https://openalex.org/W2521708680","https://openalex.org/W2556804450","https://openalex.org/W2561650451","https://openalex.org/W2562674124","https://openalex.org/W2772612468","https://openalex.org/W2786320458","https://openalex.org/W2788111881","https://openalex.org/W2804032941","https://openalex.org/W2804500013","https://openalex.org/W2810610794","https://openalex.org/W2875630249","https://openalex.org/W2889543163","https://openalex.org/W2903365351","https://openalex.org/W2912579397","https://openalex.org/W2949251082","https://openalex.org/W2949967139","https://openalex.org/W2961619211","https://openalex.org/W2963179825","https://openalex.org/W2996372729","https://openalex.org/W2996807164","https://openalex.org/W3082563116","https://openalex.org/W3102385154","https://openalex.org/W4232301837","https://openalex.org/W4239600432","https://openalex.org/W4241799649","https://openalex.org/W4285719527","https://openalex.org/W4288083528","https://openalex.org/W6736295056","https://openalex.org/W6752057402","https://openalex.org/W6843173220"],"related_works":["https://openalex.org/W2342605656","https://openalex.org/W2040522199","https://openalex.org/W154155438","https://openalex.org/W1596526459","https://openalex.org/W3019964433","https://openalex.org/W2148631234","https://openalex.org/W2017719803","https://openalex.org/W2481821631","https://openalex.org/W4205359103","https://openalex.org/W2161989345"],"abstract_inverted_index":{"Typical":[0],"parallelization":[1],"approaches":[2,27],"such":[3],"as":[4],"OpenMP":[5],"and":[6,12,51,92,112,127],"CUDA":[7],"provide":[8],"constructs":[9],"for":[10,14,17,48,70],"parallelizing":[11],"blocking":[13],"data":[15,33,39,114],"locality":[16,34],"individual":[18],"loops.":[19],"By":[20,103],"focusing":[21],"on":[22],"each":[23],"loop":[24,42,61,87,90,120],"separately,":[25],"these":[26],"fail":[28],"to":[29,37,118],"leverage":[30],"sources":[31],"of":[32,100,130,138,142],"possible":[35],"due":[36],"inter-loop":[38,53],"reuse.":[40],"The":[41],"chain":[43,62],"abstraction":[44,63],"provides":[45],"a":[46,66],"framework":[47],"reasoning":[49],"about":[50],"applying":[52],"optimizations.":[54],"In":[55],"this":[56],"work,":[57],"we":[58,109],"incorporate":[59],"the":[60,75,83,97,124,139],"into":[64],"RAJA,":[65,77],"performance":[67,125,140],"portability":[68],"layer":[69],"high-performance":[71],"computing":[72],"applications.":[73],"Using":[74],"loop-chain-extended":[76],"or":[78],"RAJALC,":[79],"developers":[80],"can":[81,110],"have":[82],"RAJA":[84],"library":[85],"apply":[86],"transformations":[88],"like":[89],"fusion":[91],"overlapped":[93],"tiling":[94],"while":[95],"maintaining":[96],"original":[98],"structure":[99],"their":[101],"programs.":[102],"introducing":[104],"targeted":[105],"symbolic":[106],"execution":[107],"capabilities,":[108],"collect":[111],"cache":[113],"access":[115],"information":[116],"required":[117],"verify":[119],"transformations.":[121],"We":[122],"evaluate":[123],"improvement":[126],"refactoring":[128],"costs":[129],"our":[131,134],"extension.":[132],"Overall,":[133],"results":[135],"demonstrate":[136],"85-98\\%":[137],"improvements":[141],"hand-optimized":[143],"kernels":[144],"with":[145],"dramatically":[146],"fewer":[147],"code":[148],"changes.":[149]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
