{"id":"https://openalex.org/W4367684987","doi":"https://doi.org/10.1145/3577193.3593705","title":"PERKS: a Locality-Optimized Execution Model for Iterative Memory-bound GPU Applications","display_name":"PERKS: a Locality-Optimized Execution Model for Iterative Memory-bound GPU Applications","publication_year":2023,"publication_date":"2023-06-20","ids":{"openalex":"https://openalex.org/W4367684987","doi":"https://doi.org/10.1145/3577193.3593705"},"language":"en","primary_location":{"id":"doi:10.1145/3577193.3593705","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3577193.3593705","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.osti.gov/servlets/purl/1994672","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078250463","display_name":"Lingqi Zhang","orcid":"https://orcid.org/0000-0002-2452-1551"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Lingqi Zhang","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology, Tokyo, Japan","Tokyo Institute of Technology, Tokyo, Japan","Tokyo Institute of Technology, Tokyo, Japan National Institute of Advanced Industrial Science and Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I73613424"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan National Institute of Advanced Industrial Science and Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I73613424","https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002208999","display_name":"Mohamed Wahib","orcid":"https://orcid.org/0000-0002-7165-2095"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Mohamed Wahib","raw_affiliation_strings":["RIKEN Center for Computational Science, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Tokyo, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338479","display_name":"Peng Chen","orcid":"https://orcid.org/0000-0003-1244-3151"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]},{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Peng Chen","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology, Tokyo, Japan","RIKEN Center for Computational Science, Tokyo, Japan","National Institute of Advanced Industrial Science and Technology, Tokyo, Japan RIKEN Center for Computational Science, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I73613424"]},{"raw_affiliation_string":"RIKEN Center for Computational Science, Tokyo, Japan","institution_ids":["https://openalex.org/I4210129730"]},{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology, Tokyo, Japan RIKEN Center for Computational Science, Tokyo, Japan","institution_ids":["https://openalex.org/I4210129730","https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056275783","display_name":"Jintao Meng","orcid":"https://orcid.org/0000-0002-6208-4102"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jintao Meng","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100411431","display_name":"Xiao Wang","orcid":"https://orcid.org/0000-0001-6545-1943"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]},{"id":"https://openalex.org/I1309980932","display_name":"National Transportation Research Center","ror":"https://ror.org/011fc0n53","country_code":"US","type":"government","lineage":["https://openalex.org/I1309980932"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiao Wang","raw_affiliation_strings":["Oak Ridge National Laboratory, Knoxville, United States of America"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Knoxville, United States of America","institution_ids":["https://openalex.org/I1309980932","https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011254074","display_name":"Toshio Endo","orcid":"https://orcid.org/0000-0001-7297-6211"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toshio Endo","raw_affiliation_strings":["Tokyo Institute of Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100634486","display_name":"Satoshi Matsuoka","orcid":"https://orcid.org/0000-0003-1910-8532"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Matsuoka","raw_affiliation_strings":["RIKEN Center for Computational Science, Kobe, Japan","Tokyo Institute of Technology, Tokyo, Japan","RIKEN Center for Computational Science, Kobe, Japan Tokyo Institute of Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Kobe, Japan","institution_ids":["https://openalex.org/I4210129730"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]},{"raw_affiliation_string":"RIKEN Center for Computational Science, Kobe, Japan Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I4210129730","https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5078250463"],"corresponding_institution_ids":["https://openalex.org/I114531698","https://openalex.org/I73613424"],"apc_list":null,"apc_paid":null,"fwci":3.305,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.92506853,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"167","last_page":"179"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8261616230010986},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8227839469909668},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8038514852523804},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.677832305431366},{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.6323919296264648},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.5893378853797913},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.44975796341896057},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.4380101263523102},{"id":"https://openalex.org/keywords/iterative-method","display_name":"Iterative method","score":0.4155261516571045},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.28055208921432495},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2752233147621155},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10287928581237793}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8261616230010986},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8227839469909668},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8038514852523804},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.677832305431366},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.6323919296264648},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.5893378853797913},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.44975796341896057},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.4380101263523102},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.4155261516571045},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.28055208921432495},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2752233147621155},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10287928581237793},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3577193.3593705","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3577193.3593705","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th International Conference on Supercomputing","raw_type":"proceedings-article"},{"id":"pmh:oai:osti.gov:1994672","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1994672","pdf_url":"https://www.osti.gov/servlets/purl/1994672","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:osti.gov:1994672","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1994672","pdf_url":"https://www.osti.gov/servlets/purl/1994672","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1036901904","display_name":"\u6570\u5b66\u6559\u80b2\u306e\u6620\u50cf\u5316\u3068\u6559\u5177\u306e\u958b\u767a","funder_award_id":"20006","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G1069223013","display_name":null,"funder_award_id":"JSPS KAKENHI","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G1645119126","display_name":null,"funder_award_id":"AC05-00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G1719536385","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G1769477490","display_name":"Large-scale Tomography Computation","funder_award_id":"21K17750","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G2042897603","display_name":null,"funder_award_id":"DE-AC05-00OR2272","funder_id":"https://openalex.org/F4320316892","funder_display_name":"UT-Battelle"},{"id":"https://openalex.org/G2296932962","display_name":null,"funder_award_id":"DE-AC05-00OR227","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3291250701","display_name":null,"funder_award_id":"KAKENHI","funder_id":"https://openalex.org/F4320338111","funder_display_name":"Precursory Research for Embryonic Science and Technology"},{"id":"https://openalex.org/G3299391273","display_name":null,"funder_award_id":"E-AC05-00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3459562248","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3818729332","display_name":null,"funder_award_id":"JPNP20006","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3912533391","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4064776908","display_name":null,"funder_award_id":"JPMJPR20MA","funder_id":"https://openalex.org/F4320338111","funder_display_name":"Precursory Research for Embryonic Science and Technology"},{"id":"https://openalex.org/G4170161455","display_name":null,"funder_award_id":"PRESTO","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4636223006","display_name":null,"funder_award_id":"JSPS KAK","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G5518604981","display_name":null,"funder_award_id":"PREST","funder_id":"https://openalex.org/F4320338111","funder_display_name":"Precursory Research for Embryonic Science and Technology"},{"id":"https://openalex.org/G5726405315","display_name":null,"funder_award_id":"DE-AC05","funder_id":"https://openalex.org/F4320306250","funder_display_name":"Battelle"},{"id":"https://openalex.org/G6129992089","display_name":null,"funder_award_id":"DE-AC05-","funder_id":"https://openalex.org/F4320316892","funder_display_name":"UT-Battelle"},{"id":"https://openalex.org/G6864165199","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320306250","funder_display_name":"Battelle"},{"id":"https://openalex.org/G691578896","display_name":null,"funder_award_id":"DE-AC05-00OR2272","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7314194600","display_name":null,"funder_award_id":"RIKEN","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7340972926","display_name":null,"funder_award_id":"other","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7752643416","display_name":null,"funder_award_id":"Japan","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7995982022","display_name":null,"funder_award_id":"DE-AC05","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8414908677","display_name":null,"funder_award_id":"DE-AC0","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8430481527","display_name":null,"funder_award_id":"Number","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G8799952057","display_name":null,"funder_award_id":"DE-AC05-00OR22","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8813984943","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320316892","funder_display_name":"UT-Battelle"},{"id":"https://openalex.org/G8906985441","display_name":null,"funder_award_id":"00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8943143067","display_name":null,"funder_award_id":"AC05-00OR22725","funder_id":"https://openalex.org/F4320316892","funder_display_name":"UT-Battelle"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320306250","display_name":"Battelle","ror":"https://ror.org/01h5tnr73"},{"id":"https://openalex.org/F4320316892","display_name":"UT-Battelle","ror":"https://ror.org/04nza6677"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320335125","display_name":"RIKEN","ror":"https://ror.org/01sjwvz98"},{"id":"https://openalex.org/F4320338111","display_name":"Precursory Research for Embryonic Science and Technology","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4367684987.pdf"},"referenced_works_count":61,"referenced_works":["https://openalex.org/W192356505","https://openalex.org/W1506424797","https://openalex.org/W1540417193","https://openalex.org/W1987588924","https://openalex.org/W1988579562","https://openalex.org/W1995438745","https://openalex.org/W1997597015","https://openalex.org/W2003798513","https://openalex.org/W2023415862","https://openalex.org/W2028469001","https://openalex.org/W2049009664","https://openalex.org/W2053999255","https://openalex.org/W2063249715","https://openalex.org/W2063656563","https://openalex.org/W2067313328","https://openalex.org/W2077143534","https://openalex.org/W2107725926","https://openalex.org/W2109341366","https://openalex.org/W2110195531","https://openalex.org/W2113190809","https://openalex.org/W2134427337","https://openalex.org/W2136434791","https://openalex.org/W2150319905","https://openalex.org/W2154078685","https://openalex.org/W2169076304","https://openalex.org/W2169150754","https://openalex.org/W2234212075","https://openalex.org/W2244508497","https://openalex.org/W2296218291","https://openalex.org/W2296730406","https://openalex.org/W2302931088","https://openalex.org/W2315715336","https://openalex.org/W2402285027","https://openalex.org/W2528222261","https://openalex.org/W2552810306","https://openalex.org/W2559809597","https://openalex.org/W2617808919","https://openalex.org/W2772612468","https://openalex.org/W2792190530","https://openalex.org/W2887327791","https://openalex.org/W2889543163","https://openalex.org/W2904283553","https://openalex.org/W2936463352","https://openalex.org/W2971663118","https://openalex.org/W2983655274","https://openalex.org/W2984920043","https://openalex.org/W2996929894","https://openalex.org/W3021182071","https://openalex.org/W3038558128","https://openalex.org/W3043110088","https://openalex.org/W3046862414","https://openalex.org/W3047012757","https://openalex.org/W3099448935","https://openalex.org/W3099814709","https://openalex.org/W3104044273","https://openalex.org/W3125884253","https://openalex.org/W3130639051","https://openalex.org/W3131967960","https://openalex.org/W3156865694","https://openalex.org/W3209636422","https://openalex.org/W4236713805"],"related_works":["https://openalex.org/W2983282793","https://openalex.org/W1507301366","https://openalex.org/W2269110805","https://openalex.org/W4297942731","https://openalex.org/W3105739589","https://openalex.org/W3205838256","https://openalex.org/W2896552114","https://openalex.org/W2607998022","https://openalex.org/W2943610686","https://openalex.org/W2097757554"],"abstract_inverted_index":{"Iterative":[0],"memory-bound":[1,54],"solvers":[2],"commonly":[3],"occur":[4],"in":[5,93,97,166,174],"HPC":[6],"codes.":[7],"Typical":[8],"GPU":[9,21,56],"implementations":[10,184],"have":[11],"a":[12,132,156,179],"loop":[13,66],"on":[14],"the":[15,20,37,42,64,82,91,98,116,121],"host":[16],"side":[17],"that":[18],"invokes":[19],"kernel":[22,34],"as":[23,25],"much":[24],"time/algorithm":[26],"steps":[27],"there":[28],"are.":[29],"The":[30],"termination":[31],"of":[32,90,115,124,129,135,142,164],"each":[33,94],"implicitly":[35],"acts":[36],"barrier":[38],"required":[39],"after":[40],"advancing":[41],"solution":[43],"every":[44],"time":[45,65,95],"step.":[46],"We":[47,79,119],"propose":[48],"an":[49],"execution":[50],"model":[51],"for":[52,77,131,144,149],"running":[53],"iterative":[55,110,136],"kernels:":[57],"PERsistent":[58],"KernelS":[59],"(PERKS).":[60],"In":[61],"this":[62],"model,":[63],"is":[67],"moved":[68],"inside":[69],"persistent":[70],"kernel,":[71],"and":[72,101,126,147,155,172],"device-wide":[73],"barriers":[74],"are":[75],"used":[76],"synchronization.":[78],"then":[80],"reduce":[81],"traffic":[83],"to":[84,108],"device":[85],"memory":[86],"by":[87],"caching":[88],"subset":[89],"output":[92],"step":[96],"unused":[99],"registers":[100],"shared":[102],"memory.":[103],"PERKS":[104,125,130],"can":[105],"be":[106],"generalized":[107],"any":[109],"solver:":[111],"they":[112],"largely":[113],"independent":[114],"solver's":[117],"implementation.":[118],"explain":[120],"design":[122],"principle":[123],"demonstrate":[127],"effectiveness":[128],"wide":[133],"range":[134],"2D/3D":[137],"stencil":[138],"benchmarks":[139],"(geomean":[140,162],"speedup":[141,163],"2.12x":[143],"2D":[145],"stencils":[146,151],"1.24x":[148],"3D":[150],"over":[152,178],"state-of-art":[153,180],"libraries),":[154],"Krylov":[157],"subspace":[158],"conjugate":[159],"gradient":[160],"solver":[161],"4.86x":[165],"smaller":[167],"SpMV":[168,176],"datasets":[169,177],"from":[170],"SuiteSparse":[171],"1.43x":[173],"larger":[175],"library).":[181],"All":[182],"PERKS-based":[183],"available":[185],"at:":[186],"https://github.com/neozhang307/PERKS.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2023-05-03T00:00:00"}
