{"id":"https://openalex.org/W2996929894","doi":"https://doi.org/10.1145/3368826.3377904","title":"AN5D: automated stencil framework for high-degree temporal blocking on GPUs","display_name":"AN5D: automated stencil framework for high-degree temporal blocking on GPUs","publication_year":2020,"publication_date":"2020-02-21","ids":{"openalex":"https://openalex.org/W2996929894","doi":"https://doi.org/10.1145/3368826.3377904","mag":"2996929894"},"language":"en","primary_location":{"id":"doi:10.1145/3368826.3377904","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3368826.3377904","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3368826.3377904","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE International Symposium on Code Generation and Optimization","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3368826.3377904","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049378976","display_name":"Matsumura Kazuaki","orcid":"https://orcid.org/0000-0001-5750-0995"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]},{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Kazuaki Matsumura","raw_affiliation_strings":["Barcelona Supercomputing Center, Spain"],"affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073830951","display_name":"Hamid Reza Zohouri","orcid":null},"institutions":[{"id":"https://openalex.org/I4210112671","display_name":"Nagase ChemteX (Japan)","ror":"https://ror.org/025y41g72","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210112671"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hamid Reza Zohouri","raw_affiliation_strings":["Edgecortix, Japan"],"affiliations":[{"raw_affiliation_string":"Edgecortix, Japan","institution_ids":["https://openalex.org/I4210112671"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002208999","display_name":"Mohamed Wahib","orcid":"https://orcid.org/0000-0002-7165-2095"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Mohamed Wahib","raw_affiliation_strings":["AIST, Japan"],"affiliations":[{"raw_affiliation_string":"AIST, Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011254074","display_name":"Toshio Endo","orcid":"https://orcid.org/0000-0001-7297-6211"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toshio Endo","raw_affiliation_strings":["Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103252086","display_name":"Satoshi Matsuoka","orcid":"https://orcid.org/0000-0003-2126-2926"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Satoshi Matsuoka","raw_affiliation_strings":["RIKEN CCS, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN CCS, Japan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5049378976"],"corresponding_institution_ids":["https://openalex.org/I2799803557","https://openalex.org/I9617848"],"apc_list":null,"apc_paid":null,"fwci":8.2505,"has_fulltext":true,"cited_by_count":55,"citation_normalized_percentile":{"value":0.98181306,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"199","last_page":"211"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.9711318016052246},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8493131399154663},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8059647083282471},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.7558621764183044},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.6574604511260986},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.6057509779930115},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.4811050295829773},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4699532985687256},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4583302438259125},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.42137929797172546},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.32658302783966064},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.20661643147468567},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.17065748572349548}],"concepts":[{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.9711318016052246},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8493131399154663},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8059647083282471},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.7558621764183044},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6574604511260986},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.6057509779930115},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.4811050295829773},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4699532985687256},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4583302438259125},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.42137929797172546},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.32658302783966064},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20661643147468567},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.17065748572349548},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3368826.3377904","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3368826.3377904","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3368826.3377904","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE International Symposium on Code Generation and Optimization","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2001.01473","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.01473","pdf_url":"https://arxiv.org/pdf/2001.01473","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3368826.3377904","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3368826.3377904","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3368826.3377904","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE International Symposium on Code Generation and Optimization","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3282004645","display_name":null,"funder_award_id":"JPMJCR","funder_id":"https://openalex.org/F4320338075","funder_display_name":"Core Research for Evolutional Science and Technology"},{"id":"https://openalex.org/G3612518180","display_name":null,"funder_award_id":"JPMJCR19F5","funder_id":"https://openalex.org/F4320338075","funder_display_name":"Core Research for Evolutional Science and Technology"}],"funders":[{"id":"https://openalex.org/F4320311508","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54"},{"id":"https://openalex.org/F4320338075","display_name":"Core Research for Evolutional Science and Technology","ror":"https://ror.org/00097mb19"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2996929894.pdf","grobid_xml":"https://content.openalex.org/works/W2996929894.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W57243869","https://openalex.org/W1558370006","https://openalex.org/W1966827540","https://openalex.org/W1973532523","https://openalex.org/W1979457157","https://openalex.org/W1987588924","https://openalex.org/W2002555321","https://openalex.org/W2003798513","https://openalex.org/W2022036382","https://openalex.org/W2039378765","https://openalex.org/W2056295840","https://openalex.org/W2063249715","https://openalex.org/W2063656563","https://openalex.org/W2077143534","https://openalex.org/W2122677725","https://openalex.org/W2129471558","https://openalex.org/W2133352531","https://openalex.org/W2154078685","https://openalex.org/W2166622045","https://openalex.org/W2183420951","https://openalex.org/W2234212075","https://openalex.org/W2265231147","https://openalex.org/W2296218291","https://openalex.org/W2296730406","https://openalex.org/W2311803642","https://openalex.org/W2315715336","https://openalex.org/W2528222261","https://openalex.org/W2528745529","https://openalex.org/W2561650451","https://openalex.org/W2579915854","https://openalex.org/W2605317797","https://openalex.org/W2730450744","https://openalex.org/W2772612468","https://openalex.org/W2786544209","https://openalex.org/W2788864020","https://openalex.org/W2887619884","https://openalex.org/W2889543163","https://openalex.org/W2899691047","https://openalex.org/W2936463352","https://openalex.org/W3100822741","https://openalex.org/W3105739589","https://openalex.org/W4232301837","https://openalex.org/W4235662649"],"related_works":["https://openalex.org/W2028798345","https://openalex.org/W1999524790","https://openalex.org/W1574397187","https://openalex.org/W3106055984","https://openalex.org/W2996929894","https://openalex.org/W4223480868","https://openalex.org/W2111914791","https://openalex.org/W2244508497","https://openalex.org/W2769189194","https://openalex.org/W4321636661"],"abstract_inverted_index":{"Stencil":[0],"computation":[1,31],"is":[2,65,75,100],"one":[3],"of":[4,27,30,53,59,77,130],"the":[5,24,51,54,134,146],"most":[6],"widely-used":[7],"compute":[8],"patterns":[9,83],"in":[10,84,97,116],"high":[11,63],"performance":[12,64,104,122,136],"computing":[13],"applications.":[14],"Spatial":[15],"and":[16,56,80,90,113],"temporal":[17,127],"blocking":[18,128],"have":[19],"been":[20],"proposed":[21],"to":[22,39,61,118,125],"overcome":[23],"memory-bound":[25],"nature":[26],"this":[28],"type":[29],"by":[32,102],"moving":[33],"memory":[34,38,41,57,112],"pressure":[35,115],"from":[36],"external":[37],"on-chip":[40],"on":[42,145],"GPUs.":[43],"However,":[44],"correctly":[45],"implementing":[46],"those":[47],"optimizations":[48],"while":[49],"considering":[50],"complexity":[52],"architecture":[55],"hierarchy":[58],"GPUs":[60],"achieve":[62,133],"difficult.":[66],"We":[67,132],"propose":[68],"AN5D,":[69],"an":[70],"automated":[71],"stencil":[72,82,143],"framework":[73,99],"which":[74],"capable":[76],"automatically":[78],"transforming":[79],"optimizing":[81],"a":[85,126],"given":[86],"C":[87],"source":[88],"code,":[89],"generating":[91],"corresponding":[92],"CUDA":[93],"code.":[94],"Parameter":[95],"tuning":[96],"our":[98,103],"guided":[101],"model.":[105],"Our":[106],"novel":[107],"optimization":[108],"strategy":[109],"reduces":[110],"shared":[111],"register":[114],"comparison":[117],"existing":[119],"implementations,":[120],"allowing":[121],"scaling":[123],"up":[124],"degree":[129],"10.":[131],"highest":[135],"reported":[137],"so":[138],"far":[139],"for":[140],"all":[141],"evaluated":[142],"benchmarks":[144],"state-of-the-art":[147],"Tesla":[148],"V100":[149],"GPU.":[150]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2020-01-10T00:00:00"}
