{"id":"https://openalex.org/W4379919500","doi":"https://doi.org/10.1145/3589236.3589242","title":"Exploiting Scratchpad Memory for Deep Temporal Blocking","display_name":"Exploiting Scratchpad Memory for Deep Temporal Blocking","publication_year":2023,"publication_date":"2023-02-25","ids":{"openalex":"https://openalex.org/W4379919500","doi":"https://doi.org/10.1145/3589236.3589242"},"language":"en","primary_location":{"id":"doi:10.1145/3589236.3589242","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589236.3589242","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th Workshop on General Purpose Processing Using GPU","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2306.03336","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078250463","display_name":"Lingqi Zhang","orcid":"https://orcid.org/0000-0002-2452-1551"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Lingqi Zhang","raw_affiliation_strings":["Tokyo Institute of Technology, Japan and National Institute of Advanced Industrial Science and Technology, Japan"],"raw_orcid":"https://orcid.org/0000-0002-2452-1551","affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Japan and National Institute of Advanced Industrial Science and Technology, Japan","institution_ids":["https://openalex.org/I73613424","https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002208999","display_name":"Mohamed Wahib","orcid":"https://orcid.org/0000-0002-7165-2095"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Mohamed Wahib","raw_affiliation_strings":["RIKEN Center for Computational Science, Japan"],"raw_orcid":"https://orcid.org/0000-0002-7165-2095","affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338479","display_name":"Peng Chen","orcid":"https://orcid.org/0000-0003-1244-3151"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]},{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Peng Chen","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology, Japan and RIKEN Center for Computational Science, Japan"],"raw_orcid":"https://orcid.org/0000-0003-1244-3151","affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology, Japan and RIKEN Center for Computational Science, Japan","institution_ids":["https://openalex.org/I4210129730","https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056275783","display_name":"Jintao Meng","orcid":"https://orcid.org/0000-0002-6208-4102"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jintao Meng","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, China"],"raw_orcid":"https://orcid.org/0000-0002-6208-4102","affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100411431","display_name":"Xiao Wang","orcid":"https://orcid.org/0000-0001-6545-1943"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiao Wang","raw_affiliation_strings":["Oak Ridge National Laboratory, United States"],"raw_orcid":"https://orcid.org/0000-0001-6545-1943","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, United States","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011254074","display_name":"Toshio Endo","orcid":"https://orcid.org/0000-0001-7297-6211"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toshio Endo","raw_affiliation_strings":["Tokyo Institute of Technology, Japan"],"raw_orcid":"https://orcid.org/0000-0001-7297-6211","affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100634486","display_name":"Satoshi Matsuoka","orcid":"https://orcid.org/0000-0003-1910-8532"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Matsuoka","raw_affiliation_strings":["RIKEN Center for Computational Science, Japan and Tokyo Institute of Technology, Japan"],"raw_orcid":"https://orcid.org/0000-0003-1910-8532","affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Japan and Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I4210129730","https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5078250463"],"corresponding_institution_ids":["https://openalex.org/I114531698","https://openalex.org/I73613424"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06705273,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"34","last_page":"35"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8558429479598999},{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.8488748669624329},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7600647211074829},{"id":"https://openalex.org/keywords/tile","display_name":"Tile","score":0.4627326726913452},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.421130508184433},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4201143980026245},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4168930947780609},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.3851199150085449},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3255184590816498},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.23952215909957886},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.16762661933898926}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8558429479598999},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.8488748669624329},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7600647211074829},{"id":"https://openalex.org/C2780728851","wikidata":"https://www.wikidata.org/wiki/Q468402","display_name":"Tile","level":2,"score":0.4627326726913452},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.421130508184433},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4201143980026245},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4168930947780609},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.3851199150085449},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3255184590816498},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.23952215909957886},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.16762661933898926},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3589236.3589242","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589236.3589242","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th Workshop on General Purpose Processing Using GPU","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2306.03336","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.03336","pdf_url":"https://arxiv.org/pdf/2306.03336","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:irdb.nii.ac.jp:00897:0005861214","is_oa":false,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100893932","pdf_url":null,"source":{"id":"https://openalex.org/S7407056385","display_name":"Institutional Repositories DataBase (IRDB)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I184597095","host_organization_name":"National Institute of Informatics","host_organization_lineage":["https://openalex.org/I184597095"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"proceedings of the 15th Workshop on General Purpose Processing Using GPU (GPGPU 2023)","raw_type":"conference paper"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2306.03336","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.03336","pdf_url":"https://arxiv.org/pdf/2306.03336","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3818729332","display_name":null,"funder_award_id":"JPNP20006","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G6725152017","display_name":null,"funder_award_id":"JPNP20006","funder_id":"https://openalex.org/F4320321034","funder_display_name":"New Energy and Industrial Technology Development Organization"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320321034","display_name":"New Energy and Industrial Technology Development Organization","ror":"https://ror.org/0055k7a87"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320335125","display_name":"RIKEN","ror":"https://ror.org/01sjwvz98"},{"id":"https://openalex.org/F4320338287","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4379919500.pdf"},"referenced_works_count":3,"referenced_works":["https://openalex.org/W2889543163","https://openalex.org/W2996929894","https://openalex.org/W3099814709"],"related_works":["https://openalex.org/W3105129168","https://openalex.org/W2804920739","https://openalex.org/W2114959296","https://openalex.org/W4316371992","https://openalex.org/W2186216222","https://openalex.org/W2392765154","https://openalex.org/W2008005532","https://openalex.org/W1971603802","https://openalex.org/W641925902","https://openalex.org/W2149904643"],"abstract_inverted_index":{"General":[0],"Purpose":[1],"Graphics":[2],"Processing":[3],"Units":[4],"(GPGPU)":[5],"are":[6],"used":[7],"in":[8,14,29,80],"most":[9],"of":[10,19,49,69,145],"the":[11,30,46,55,67,78,87,103,117],"top":[12],"systems":[13],"HPC.":[15],"The":[16],"total":[17],"capacity":[18,48],"scratchpad":[20,50,71,100],"memory":[21,101],"has":[22],"increased":[23],"by":[24],"more":[25],"than":[26],"40":[27],"times":[28],"last":[31],"decade.":[32],"However,":[33],"existing":[34,74],"optimizations":[35],"for":[36],"stencil":[37,60],"computations":[38],"using":[39],"temporal":[40],"blocking":[41],"have":[42],"not":[43,142],"aggressively":[44],"exploited":[45],"large":[47,70,94],"memory.":[51,72,122],"This":[52],"work":[53],"uses":[54],"2D":[56],"Jacobian":[57],"5-point":[58],"iterative":[59],"as":[61],"a":[62,81,112],"case":[63],"study":[64],"to":[65,96,120,131],"investigate":[66],"use":[68],"Unlike":[73],"research":[75],"that":[76,90,126],"tiles":[77],"domain":[79,88],"thread":[82],"block":[83],"fashion,":[84],"we":[85,106],"tile":[86,92,114],"so":[89],"each":[91],"is":[93,129,137],"enough":[95],"utilize":[97],"all":[98],"available":[99],"on":[102],"GPU.":[104],"Consequently,":[105],"process":[107],"several":[108],"time":[109],"steps":[110],"inside":[111],"single":[113],"before":[115],"offloading":[116],"result":[118],"back":[119],"global":[121],"Our":[123],"evaluation":[124],"shows":[125],"our":[127,135],"performance":[128],"comparable":[130],"state-of-the-art":[132],"implementations,":[133],"yet":[134],"implementation":[136],"much":[138],"simpler":[139],"and":[140],"does":[141],"require":[143],"auto-generation":[144],"code.":[146]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2023-06-09T00:00:00"}
