{"id":"https://openalex.org/W4313021323","doi":"https://doi.org/10.1145/3560442.3560450","title":"Compact Update Algorithm for Numerical Schemes with Cross Stencil for Data Access Locality","display_name":"Compact Update Algorithm for Numerical Schemes with Cross Stencil for Data Access Locality","publication_year":2022,"publication_date":"2022-07-08","ids":{"openalex":"https://openalex.org/W4313021323","doi":"https://doi.org/10.1145/3560442.3560450"},"language":"en","primary_location":{"id":"doi:10.1145/3560442.3560450","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3560442.3560450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 6th High Performance Computing and Cluster Technologies Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083518099","display_name":"Andrey Zakirov","orcid":"https://orcid.org/0000-0001-7346-6635"},"institutions":[{"id":"https://openalex.org/I4210098050","display_name":"Kintech Lab (Russia)","ror":"https://ror.org/00xrmm659","country_code":"RU","type":"company","lineage":["https://openalex.org/I4210098050"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Andrey V. Zakirov","raw_affiliation_strings":["Kintech Lab Ltd., Russian Federation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kintech Lab Ltd., Russian Federation","institution_ids":["https://openalex.org/I4210098050"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049751406","display_name":"Boris Korneev","orcid":"https://orcid.org/0000-0003-1010-6629"},"institutions":[{"id":"https://openalex.org/I86427540","display_name":"Keldysh Institute of Applied Mathematics","ror":"https://ror.org/01dv3hq14","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210124601","https://openalex.org/I86427540"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Boris A. Korneev","raw_affiliation_strings":["Keldysh Institute of Applied Mathematics RAS, Russian Federation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Keldysh Institute of Applied Mathematics RAS, Russian Federation","institution_ids":["https://openalex.org/I86427540"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028324669","display_name":"Anastasia Perepelkina","orcid":"https://orcid.org/0000-0003-2517-6064"},"institutions":[{"id":"https://openalex.org/I86427540","display_name":"Keldysh Institute of Applied Mathematics","ror":"https://ror.org/01dv3hq14","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210124601","https://openalex.org/I86427540"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Anastasia Yu. Perepelkina","raw_affiliation_strings":["Keldysh Institute of Applied Mathematics RAS, Russian Federation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Keldysh Institute of Applied Mathematics RAS, Russian Federation","institution_ids":["https://openalex.org/I86427540"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2852,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.59854304,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"51","last_page":"58"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10339","display_name":"Advanced Numerical Methods in Computational Mathematics","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8109695911407471},{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.7530578970909119},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6651246547698975},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.592842698097229},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.46636852622032166},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.45915690064430237},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4534268081188202},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.43423449993133545},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.42454689741134644},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4016174376010895},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3675130009651184},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.09668180346488953}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8109695911407471},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.7530578970909119},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6651246547698975},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.592842698097229},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.46636852622032166},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.45915690064430237},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4534268081188202},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.43423449993133545},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.42454689741134644},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4016174376010895},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3675130009651184},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.09668180346488953},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3560442.3560450","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3560442.3560450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 6th High Performance Computing and Cluster Technologies Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1201276785","https://openalex.org/W1583892864","https://openalex.org/W1970141743","https://openalex.org/W1994935630","https://openalex.org/W2002555321","https://openalex.org/W2023528167","https://openalex.org/W2033876836","https://openalex.org/W2038481157","https://openalex.org/W2234212075","https://openalex.org/W2296218291","https://openalex.org/W2804393274","https://openalex.org/W2997667741","https://openalex.org/W3003944395","https://openalex.org/W3024733540","https://openalex.org/W3112882530","https://openalex.org/W3180049492","https://openalex.org/W4205236375","https://openalex.org/W4206222013","https://openalex.org/W4252060206"],"related_works":["https://openalex.org/W3105129168","https://openalex.org/W2804920739","https://openalex.org/W4316371992","https://openalex.org/W2186216222","https://openalex.org/W2392765154","https://openalex.org/W2564509292","https://openalex.org/W2008005532","https://openalex.org/W4243631099","https://openalex.org/W4285014488","https://openalex.org/W2066011280"],"abstract_inverted_index":{"Accurate":[0],"fluid":[1,10],"simulations":[2],"require":[3],"high":[4],"computing":[5,179],"cost.":[6],"3D":[7],"modelling":[8],"of":[9,21,37,84,96,138],"dynamic":[11],"field":[12],"evolution":[13],"on":[14,64,131,144],"a":[15,114],"discrete":[16],"mesh":[17],"takes":[18],"large":[19],"amount":[20],"data":[22,25,98,102,153,163,173],"storage,":[23],"and":[24,134,181],"access":[26],"becomes":[27],"performance":[28,128,137],"bottleneck.":[29],"Our":[30],"work":[31],"is":[32,78,108,129,142,155,165],"concerned":[33],"with":[34,110,170],"the":[35,39,49,71,118,135,151,161,172,178,185],"task":[36],"mitigating":[38],"limitations":[40],"that":[41,150],"are":[42,175],"caused":[43],"by":[44,167,177],"finite":[45],"memory":[46],"throughput":[47],"in":[48,156],"parallel":[50],"simulations.":[51],"We":[52],"use":[53],"LRnLA":[54,76],"algorithms":[55],"for":[56,74,91,113,117],"this":[57,69],"issue,":[58],"where":[59],"localized":[60],"tasks":[61],"combine":[62],"updates":[63,125],"several":[65],"time":[66],"layers.":[67],"In":[68],"paper,":[70],"compact":[72,106],"update":[73,107],"DiamondTorre":[75,85],"algorithm":[77],"constructed.":[79],"It":[80],"further":[81],"improves":[82,88],"localization":[83],"algorithm,":[86],"which":[87],"arithmetic":[89],"intensity":[90],"cross-stencil":[92],"schemes.":[93],"The":[94,105],"ratio":[95],"loaded":[97],"to":[99],"fully":[100],"updated":[101],"approaches":[103],"1.":[104],"implemented":[109],"CUDA":[111],"C++":[112],"numerical":[115],"scheme":[116],"advection-diffusion":[119],"equation.":[120],"50":[121],"GLU/sec":[122,141],"(billion":[123],"lattice":[124],"per":[126],"second)":[127],"obtained":[130,143],"Nvidia":[132],"RTX3090,":[133],"maximal":[136],"almost":[139],"300":[140],"an":[145],"8":[146],"GPU":[147],"workstation.":[148],"Note":[149],"main":[152],"storage":[154],"CPU":[157],"RAM":[158],"memory,":[159],"but":[160],"host-device":[162],"exchange":[164],"concealed":[166,176],"temporal":[168],"blocking:":[169],"appropriate":[171],"transfers":[174],"operations":[180],"do":[182],"not":[183],"affect":[184],"performance.":[186]},"counts_by_year":[{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
