{"id":"https://openalex.org/W4406233735","doi":"https://doi.org/10.1177/10943420241313385","title":"Efficiency and scalability of fully-resolved fluid-particle simulations on heterogeneous CPU-GPU architectures","display_name":"Efficiency and scalability of fully-resolved fluid-particle simulations on heterogeneous CPU-GPU architectures","publication_year":2025,"publication_date":"2025-01-10","ids":{"openalex":"https://openalex.org/W4406233735","doi":"https://doi.org/10.1177/10943420241313385"},"language":"en","primary_location":{"id":"doi:10.1177/10943420241313385","is_oa":true,"landing_page_url":"https://doi.org/10.1177/10943420241313385","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1177/10943420241313385","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068595224","display_name":"Samuel Kemmler","orcid":"https://orcid.org/0000-0002-9631-7349"},"institutions":[{"id":"https://openalex.org/I1330165540","display_name":"Federal Institute For Materials Research and Testing","ror":"https://ror.org/03x516a66","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1330165540"]},{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Samuel Kemmler","raw_affiliation_strings":["Bundesanstalt f\u00fcr Materialforschung und -pr\u00fcfung (BAM)","Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","Chair for System Simulation, Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany","Division 7.2 for Buildings and Structures, Bundesanstalt f\u00fcr Materialforschung und -pr\u00fcfung (BAM), Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Bundesanstalt f\u00fcr Materialforschung und -pr\u00fcfung (BAM)","institution_ids":["https://openalex.org/I1330165540"]},{"raw_affiliation_string":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","institution_ids":["https://openalex.org/I181369854"]},{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]},{"raw_affiliation_string":"Division 7.2 for Buildings and Structures, Bundesanstalt f\u00fcr Materialforschung und -pr\u00fcfung (BAM), Berlin, Germany","institution_ids":["https://openalex.org/I1330165540"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002494157","display_name":"Christoph Rettinger","orcid":"https://orcid.org/0000-0002-0605-3731"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christoph Rettinger","raw_affiliation_strings":["Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","Chair for System Simulation, Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","institution_ids":["https://openalex.org/I181369854"]},{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047821416","display_name":"Ulrich Ruede","orcid":"https://orcid.org/0000-0001-8796-8599"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]},{"id":"https://openalex.org/I4210106946","display_name":"Centre Europ\u00e9en de Recherche et de Formation Avanc\u00e9e en Calcul Scientifique","ror":"https://ror.org/02dzbc556","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4210106946"]}],"countries":["DE","FR"],"is_corresponding":false,"raw_author_name":"Ulrich R\u00fcde","raw_affiliation_strings":["CERFACS","Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","Chair for System Simulation, Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany","CERFACS, Toulouse, France"],"affiliations":[{"raw_affiliation_string":"CERFACS","institution_ids":["https://openalex.org/I4210106946"]},{"raw_affiliation_string":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","institution_ids":["https://openalex.org/I181369854"]},{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]},{"raw_affiliation_string":"CERFACS, Toulouse, France","institution_ids":["https://openalex.org/I4210106946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039838763","display_name":"Pablo Cu\u00e9llar","orcid":"https://orcid.org/0000-0003-2446-8065"},"institutions":[{"id":"https://openalex.org/I1330165540","display_name":"Federal Institute For Materials Research and Testing","ror":"https://ror.org/03x516a66","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1330165540"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Pablo Cu\u00e9llar","raw_affiliation_strings":["Bundesanstalt f\u00fcr Materialforschung und -pr\u00fcfung (BAM)","Division 7.2 for Buildings and Structures, Bundesanstalt f\u00fcr Materialforschung und -pr\u00fcfung (BAM), Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Bundesanstalt f\u00fcr Materialforschung und -pr\u00fcfung (BAM)","institution_ids":["https://openalex.org/I1330165540"]},{"raw_affiliation_string":"Division 7.2 for Buildings and Structures, Bundesanstalt f\u00fcr Materialforschung und -pr\u00fcfung (BAM), Berlin, Germany","institution_ids":["https://openalex.org/I1330165540"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043208394","display_name":"Harald Koestler","orcid":"https://orcid.org/0000-0002-6992-2690"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Harald K\u00f6stler","raw_affiliation_strings":["Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","Chair for System Simulation, Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","institution_ids":["https://openalex.org/I181369854"]},{"raw_affiliation_string":"Chair for System Simulation, Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068595224"],"corresponding_institution_ids":["https://openalex.org/I1330165540","https://openalex.org/I181369854"],"apc_list":null,"apc_paid":null,"fwci":11.4187,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.98916155,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"39","issue":"3","first_page":"345","last_page":"363"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11751","display_name":"Lattice Boltzmann Simulation Studies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11751","display_name":"Lattice Boltzmann Simulation Studies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11382","display_name":"Fluid Dynamics and Heat Transfer","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11694","display_name":"Fluid Dynamics Simulations and Interactions","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7441328167915344},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6791248321533203},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6643801927566528},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.5023331642150879},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.47240298986434937},{"id":"https://openalex.org/keywords/particle","display_name":"Particle (ecology)","score":0.41832372546195984},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.12651091814041138},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.061459898948669434}],"concepts":[{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7441328167915344},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6791248321533203},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6643801927566528},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5023331642150879},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.47240298986434937},{"id":"https://openalex.org/C2778517922","wikidata":"https://www.wikidata.org/wiki/Q7140482","display_name":"Particle (ecology)","level":2,"score":0.41832372546195984},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.12651091814041138},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.061459898948669434},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1177/10943420241313385","is_oa":true,"landing_page_url":"https://doi.org/10.1177/10943420241313385","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},{"id":"pmh:oai:open.fau.de:openfau/35973","is_oa":true,"landing_page_url":"https://open.fau.de/handle/openfau/35973","pdf_url":"https://open.fau.de/bitstreams/e16022fb-9ee6-496a-9979-9d702e56738e/download","source":{"id":"https://openalex.org/S7407055110","display_name":"OPUS FAU - Online publication system of Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1177/10943420241313385","is_oa":true,"landing_page_url":"https://doi.org/10.1177/10943420241313385","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2689834324","display_name":null,"funder_award_id":"101093393","funder_id":"https://openalex.org/F1100377486","funder_display_name":"European High Performance Computing Joint Undertaking"},{"id":"https://openalex.org/G6052429835","display_name":null,"funder_award_id":"(DFG)","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G7389665383","display_name":null,"funder_award_id":"433735254","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F1100377486","display_name":"European High Performance Computing Joint Undertaking","ror":null},{"id":"https://openalex.org/F4320320873","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57"},{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320331625","display_name":"Gauss Centre for Supercomputing","ror":"https://ror.org/0585fsj26"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W1522241184","https://openalex.org/W1977708713","https://openalex.org/W1992773065","https://openalex.org/W2010936531","https://openalex.org/W2029838116","https://openalex.org/W2071421256","https://openalex.org/W2071530432","https://openalex.org/W2073775990","https://openalex.org/W2074129693","https://openalex.org/W2076525342","https://openalex.org/W2087386744","https://openalex.org/W2088418115","https://openalex.org/W2106821446","https://openalex.org/W2108576088","https://openalex.org/W2166776469","https://openalex.org/W2211236111","https://openalex.org/W2263037260","https://openalex.org/W2411402281","https://openalex.org/W2513045872","https://openalex.org/W2536891287","https://openalex.org/W2590887999","https://openalex.org/W2592678318","https://openalex.org/W2608672451","https://openalex.org/W2613785092","https://openalex.org/W2758320287","https://openalex.org/W2765368849","https://openalex.org/W2791192255","https://openalex.org/W2801005379","https://openalex.org/W2970375571","https://openalex.org/W2985187207","https://openalex.org/W2999047083","https://openalex.org/W3000848515","https://openalex.org/W3010528554","https://openalex.org/W3015570157","https://openalex.org/W3102111730","https://openalex.org/W3107080689","https://openalex.org/W3112327526","https://openalex.org/W3133727106","https://openalex.org/W3160454883","https://openalex.org/W3166701786","https://openalex.org/W3183375027","https://openalex.org/W4239647010","https://openalex.org/W4288035259","https://openalex.org/W4311145589","https://openalex.org/W4321482611","https://openalex.org/W4366826512","https://openalex.org/W4385576899","https://openalex.org/W4398781219","https://openalex.org/W4401419033"],"related_works":["https://openalex.org/W2005148983","https://openalex.org/W2012954338","https://openalex.org/W2096672917","https://openalex.org/W2392023973","https://openalex.org/W4401278057","https://openalex.org/W3189307731","https://openalex.org/W1428699136","https://openalex.org/W2949962288","https://openalex.org/W2364686214","https://openalex.org/W1998560227"],"abstract_inverted_index":{"Current":[0],"supercomputers":[1],"often":[2],"have":[3],"a":[4,49,67,73,79,119,144,152,218,265,276],"heterogeneous":[5,63,286],"architecture":[6],"using":[7],"both":[8],"conventional":[9,101],"Central":[10],"Processing":[11,16],"Units":[12,17],"(CPUs)":[13],"and":[14,109,130],"Graphics":[15],"(GPUs).":[18],"At":[19],"the":[20,62,88,93,97,126,134,148,166,169,175,196,203,206,223,233,243,246,255,282],"same":[21],"time,":[22],"numerical":[23],"simulation":[24,54,77,138,156,198],"tasks":[25],"frequently":[26],"involve":[27],"multiphysics":[28],"scenarios":[29],"whose":[30],"components":[31],"run":[32],"on":[33,100,143],"different":[34,53,59],"hardware":[35],"due":[36,164],"to":[37,48,58,124,131,165,191,232,245,251,261],"multiple":[38],"reasons,":[39],"e.g.,":[40],"architectural":[41],"requirements,":[42],"pragmatism,":[43],"etc.":[44],"This":[45,162],"leads":[46],"naturally":[47],"software":[50],"design":[51],"where":[52],"modules":[55],"are":[56,113,273],"mapped":[57],"subsystems":[60],"of":[61,78,87,168,177,189,205,285],"architecture.":[64],"We":[65],"present":[66],"detailed":[68],"performance":[69,129,142,201],"analysis":[70],"for":[71,96,151,241,281],"such":[72],"hybrid":[74,277],"four-way":[75],"coupled":[76],"fully":[80],"resolved":[81],"particle-laden":[82],"flow.":[83],"The":[84],"Eulerian":[85,136,154],"representation":[86],"flow":[89,155,197,247],"utilizes":[90],"GPUs,":[91,184],"while":[92],"Lagrangian":[94,208],"model":[95,105,121],"particles":[98,244],"runs":[99],"CPUs.":[102],"Two":[103],"characteristic":[104],"situations":[106],"involving":[107],"dense":[108,224],"dilute":[110],"particle":[111,209,225],"systems":[112],"used":[114],"as":[115],"benchmark":[116],"scenarios.":[117],"First,":[118],"roofline":[120],"is":[122,163,193,230,239,259],"employed":[123],"predict":[125],"node":[127],"level":[128],"show":[132],"that":[133,215,275],"lattice-Boltzmann-based":[135],"fluid":[137],"reaches":[139],"very":[140],"good":[141,200],"single":[145],"GPU.":[146],"Furthermore,":[147],"GPU-GPU":[149],"communication":[150,178,235,257],"large-scale":[153],"results":[157],"in":[158],"only":[159],"moderate":[160],"slowdowns.":[161],"efficiency":[167,188],"CUDA-aware":[170],"MPI":[171],"communication,":[172],"combined":[173],"with":[174],"use":[176,284],"hiding":[179],"techniques.":[180],"On":[181],"1024":[182],"A100":[183],"an":[185],"overall":[186],"parallel":[187],"up":[190],"71%":[192],"achieved.":[194],"While":[195],"has":[199],"characteristics,":[202],"integration":[204],"stiff":[207],"system":[210],"requires":[211],"frequent":[212],"CPU-CPU":[213],"communications":[214],"can":[216],"become":[217],"bottleneck,":[219],"especially":[220],"when":[221],"simulating":[222],"system.":[226],"Additionally,":[227],"special":[228],"attention":[229],"paid":[231],"CPU-GPU":[234,256],"overhead":[236,258],"since":[237],"this":[238,269],"essential":[240],"coupling":[242],"simulation.":[248],"However,":[249],"thanks":[250],"our":[252],"problem-aware":[253],"co-partitioning,":[254],"found":[260],"be":[262],"negligible.":[263],"As":[264],"lesson":[266],"learned":[267],"from":[268],"development,":[270],"four":[271],"criteria":[272],"postulated":[274],"implementation":[278],"must":[279],"meet":[280],"efficient":[283],"supercomputers.":[287]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":5}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
