{"id":"https://openalex.org/W4285100250","doi":"https://doi.org/10.1145/3539781.3539797","title":"Distributed-memory simulations of turbulent flows on modern GPU systems using an adaptive pencil decomposition library","display_name":"Distributed-memory simulations of turbulent flows on modern GPU systems using an adaptive pencil decomposition library","publication_year":2022,"publication_date":"2022-06-27","ids":{"openalex":"https://openalex.org/W4285100250","doi":"https://doi.org/10.1145/3539781.3539797"},"language":"en","primary_location":{"id":"doi:10.1145/3539781.3539797","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3539781.3539797","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3539781.3539797","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Platform for Advanced Scientific Computing Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3539781.3539797","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030832425","display_name":"Joshua Romero","orcid":"https://orcid.org/0000-0003-1358-5565"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Joshua Romero","raw_affiliation_strings":["NVIDIA Corporation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077792054","display_name":"Pedro Costa","orcid":"https://orcid.org/0000-0001-7010-1040"},"institutions":[{"id":"https://openalex.org/I165368041","display_name":"University of Iceland","ror":"https://ror.org/01db6h964","country_code":"IS","type":"education","lineage":["https://openalex.org/I165368041"]}],"countries":["IS"],"is_corresponding":false,"raw_author_name":"Pedro Costa","raw_affiliation_strings":["University of Iceland, Reykjav\u00edk, Iceland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Iceland, Reykjav\u00edk, Iceland","institution_ids":["https://openalex.org/I165368041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079530787","display_name":"Massimiliano Fatica","orcid":"https://orcid.org/0000-0002-5839-1644"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Massimiliano Fatica","raw_affiliation_strings":["NVIDIA Corporation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation","institution_ids":["https://openalex.org/I4210127875"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5030832425"],"corresponding_institution_ids":["https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":1.9917,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.84381733,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10360","display_name":"Fluid Dynamics and Turbulent Flows","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10360","display_name":"Fluid Dynamics and Turbulent Flows","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10466","display_name":"Meteorological Phenomena and Simulations","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1902","display_name":"Atmospheric Science"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11254","display_name":"Fluid Dynamics and Vibration Analysis","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7641131281852722},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.7056200504302979},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.5992719531059265},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5680555105209351},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5522984862327576},{"id":"https://openalex.org/keywords/domain-decomposition-methods","display_name":"Domain decomposition methods","score":0.5021963119506836},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.4895342290401459},{"id":"https://openalex.org/keywords/computational-fluid-dynamics","display_name":"Computational fluid dynamics","score":0.4848538637161255},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.44406911730766296},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.2880290448665619},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2463180124759674},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08794420957565308},{"id":"https://openalex.org/keywords/finite-element-method","display_name":"Finite element method","score":0.08613666892051697}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7641131281852722},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.7056200504302979},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5992719531059265},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5680555105209351},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5522984862327576},{"id":"https://openalex.org/C198880260","wikidata":"https://www.wikidata.org/wiki/Q5289813","display_name":"Domain decomposition methods","level":3,"score":0.5021963119506836},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.4895342290401459},{"id":"https://openalex.org/C1633027","wikidata":"https://www.wikidata.org/wiki/Q815820","display_name":"Computational fluid dynamics","level":2,"score":0.4848538637161255},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.44406911730766296},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.2880290448665619},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2463180124759674},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08794420957565308},{"id":"https://openalex.org/C135628077","wikidata":"https://www.wikidata.org/wiki/Q220184","display_name":"Finite element method","level":2,"score":0.08613666892051697},{"id":"https://openalex.org/C57879066","wikidata":"https://www.wikidata.org/wiki/Q41217","display_name":"Mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3539781.3539797","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3539781.3539797","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3539781.3539797","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Platform for Advanced Scientific Computing Conference","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3539781.3539797","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3539781.3539797","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3539781.3539797","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Platform for Advanced Scientific Computing Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.5699999928474426,"display_name":"Decent work and economic growth"}],"awards":[{"id":"https://openalex.org/G1286236842","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G1677143136","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G2503023272","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3083819904","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3944918260","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G4501827968","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G4565140552","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G498139845","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G5076365615","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G5296923526","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G5614806141","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G6348972864","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6558272803","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7672463193","display_name":null,"funder_award_id":"No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G805243471","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G969889393","display_name":null,"funder_award_id":"DE-AC02-","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320317220","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4285100250.pdf","grobid_xml":"https://content.openalex.org/works/W4285100250.grobid-xml"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W1970351891","https://openalex.org/W2053513044","https://openalex.org/W2067696760","https://openalex.org/W2272797324","https://openalex.org/W2273364576","https://openalex.org/W2499602676","https://openalex.org/W2563901451","https://openalex.org/W2564800775","https://openalex.org/W2611125018","https://openalex.org/W2794125265","https://openalex.org/W2802768264","https://openalex.org/W3036255981"],"related_works":["https://openalex.org/W2384867379","https://openalex.org/W2329539859","https://openalex.org/W3191490922","https://openalex.org/W2227905990","https://openalex.org/W2765823764","https://openalex.org/W3214280620","https://openalex.org/W2066041241","https://openalex.org/W1582436825","https://openalex.org/W2022552329","https://openalex.org/W3048263112"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,49,57,76],"performance":[4,27,97,124],"analysis":[5],"of":[6,34,45,125,156],"pencil":[7],"domain":[8,86,160],"decomposition":[9,87,115,161,169],"methodologies":[10],"for":[11,18,30,53,60,117],"three-dimensional":[12],"Computational":[13],"Fluid":[14],"Dynamics":[15],"(CFD)":[16],"codes":[17,40,74],"turbulence":[19],"simulations,":[20],"on":[21,91,130],"several":[22,126],"large":[23],"GPU-accelerated":[24],"clusters.":[25],"The":[26],"was":[28],"assessed":[29,122],"the":[31,35,43,65,84,96,123,141,153,167],"numerical":[32],"solution":[33],"Navier-Stokes":[36],"equations":[37],"in":[38,69,173],"two":[39],"which":[41],"require":[42],"calculation":[44],"Fast-Fourier":[46],"Transforms":[47],"(FFT):":[48],"tri-periodic":[50],"pseudo-spectral":[51],"solver":[52,59],"isotropic":[54],"turbulence,":[55],"and":[56,88,105,147,159,165],"finite-difference":[58],"canonical":[61],"turbulent":[62],"flows,":[63],"where":[64],"FFTs":[66],"are":[67],"used":[68],"its":[70],"Poisson":[71],"solver.":[72],"Both":[73],"use":[75],"newly":[77],"developed":[78],"transpose":[79],"library":[80,170],"that":[81,152,166],"automatically":[82],"determines":[83],"optimal":[85,154],"communication":[89,127,157],"backend":[90,158],"each":[92],"system.":[93],"We":[94],"compared":[95],"across":[98],"systems":[99],"with":[100,178],"very":[101],"different":[102],"node":[103],"topologies":[104],"available":[106,129],"network":[107],"bandwidth,":[108],"to":[109],"show":[110,151],"how":[111],"these":[112,131],"characteristics":[113],"impact":[114],"selection":[116],"best":[118],"performance.":[119],"Additionally,":[120],"we":[121],"libraries":[128],"systems,":[132],"such":[133],"as":[134],"Open-MPI,":[135],"IBM":[136],"Spectrum":[137],"MPI,":[138,140],"Cray":[139],"NVIDIA":[142],"Collective":[143],"Communication":[144],"Library":[145],"(NCCL),":[146],"NVSHMEM.":[148],"Our":[149],"results":[150],"combination":[155],"is":[162,171],"highly":[163],"system-dependent,":[164],"adaptive":[168],"key":[172],"ensuring":[174],"efficient":[175],"resource":[176],"usage":[177],"minimal":[179],"user":[180],"effort.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-20T08:49:12.498775","created_date":"2025-10-10T00:00:00"}
