{"id":"https://openalex.org/W4401454936","doi":"https://doi.org/10.1145/3677333.3678270","title":"Towards Performance Portable Kernels for Computational Fluid Dynamics Using DaCe","display_name":"Towards Performance Portable Kernels for Computational Fluid Dynamics Using DaCe","publication_year":2024,"publication_date":"2024-08-09","ids":{"openalex":"https://openalex.org/W4401454936","doi":"https://doi.org/10.1145/3677333.3678270"},"language":"en","primary_location":{"id":"doi:10.1145/3677333.3678270","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3677333.3678270","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 53rd International Conference on Parallel Processing Workshops","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3677333.3678270","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055982549","display_name":"M\u00e5ns I. Andersson","orcid":"https://orcid.org/0000-0002-6384-2630"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"M\u00e5ns Ivar Andersson","raw_affiliation_strings":["KTH Royal Institute of Technology, Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033686530","display_name":"Martin Karp","orcid":"https://orcid.org/0000-0003-3374-8093"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Martin Karp","raw_affiliation_strings":["KTH Royal Institute of Technology, Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085178088","display_name":"Stefano Markidis","orcid":"https://orcid.org/0000-0003-0639-0639"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Stefano Markidis","raw_affiliation_strings":["KTH Royal Institute of Technology, Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology, Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5055982549"],"corresponding_institution_ids":["https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":0.5186,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61562021,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"110","last_page":"111"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9782999753952026,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.970300018787384,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6729066371917725},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.4864480197429657},{"id":"https://openalex.org/keywords/computational-fluid-dynamics","display_name":"Computational fluid dynamics","score":0.4846232831478119},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3650025725364685},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.34745579957962036},{"id":"https://openalex.org/keywords/aerospace-engineering","display_name":"Aerospace engineering","score":0.130791574716568},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.10819888114929199},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.10626095533370972},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09645107388496399}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6729066371917725},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.4864480197429657},{"id":"https://openalex.org/C1633027","wikidata":"https://www.wikidata.org/wiki/Q815820","display_name":"Computational fluid dynamics","level":2,"score":0.4846232831478119},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3650025725364685},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.34745579957962036},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.130791574716568},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.10819888114929199},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.10626095533370972},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09645107388496399}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3677333.3678270","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3677333.3678270","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 53rd International Conference on Parallel Processing Workshops","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3677333.3678270","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3677333.3678270","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 53rd International Conference on Parallel Processing Workshops","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W2987684178","https://openalex.org/W3155036733","https://openalex.org/W4319941915"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2902248750","https://openalex.org/W2583171488","https://openalex.org/W2006186928","https://openalex.org/W2275426735","https://openalex.org/W2390279801","https://openalex.org/W2189586792","https://openalex.org/W2016193480","https://openalex.org/W2082695312"],"abstract_inverted_index":{"With":[0],"the":[1,15,36,40,53,76,93,106,129,138,156,180,190,200,208,212],"rise":[2],"of":[3,42,55,78,96,111,158,184,202,210,215,227],"new":[4,43,101],"high-performance":[5,79,117,175],"computing":[6],"(HPC)":[7],"accelerators,":[8,91],"such":[9],"as":[10],"Nvidia":[11,196],"and":[12,89,178],"AMD":[13],"GPUs,":[14],"demand":[16],"for":[17,28,86,99,120,182,223],"efficient":[18],"code":[19,44,85,98,108,176,188,204],"targeting":[20],"diverse":[21],"hardware":[22,33],"accelerators":[23],"poses":[24],"a":[25,69,100,115],"critical":[26,116,224],"challenge":[27],"HPC":[29,37],"application":[30,58],"developers.":[31],"This":[32,142],"diversity":[34],"in":[35,50],"systems":[38],"necessitates":[39],"development":[41],"tailored":[45],"to":[46,74,114,150],"specific":[47],"architectures,":[48],"which,":[49],"turn,":[51],"hampers":[52],"sustainability":[54,214],"large":[56],"scientific":[57,217],"development.":[59],"In":[60],"this":[61,159,172],"work,":[62],"we":[63,126,206],"rely":[64],"on":[65,128,137,145,195],"DaCe":[66,81],"[1,":[67],"2],":[68],"data-centric":[70],"parallel":[71],"programming":[72],"framework,":[73],"automate":[75],"generation":[77,109,177],"kernels.":[80],"can":[82],"generate":[83],"automatic":[84,107,203],"multicore":[87],"processors":[88],"various":[90],"alleviating":[92],"programmer":[94],"burden":[95],"rewriting":[97],"architecture.":[102],"Our":[103],"work":[104,194],"demonstrates":[105],"capabilities":[110],"DaCe,":[112],"applied":[113],"computational":[118,160,225],"kernel":[119,161],"Computational":[121],"Fluid":[122],"Dynamics":[123],"code.":[124],"Specifically,":[125],"focus":[127],"Fortran-based":[130],"solver,":[131],"Neko":[132,191],"[4]":[133],"which":[134],"is":[135],"based":[136],"Spectral":[139],"Element":[140],"Method.":[141],"method":[143],"relies":[144],"small-sized":[146],"matrix":[147],"multiplications":[148],"akin":[149],"BLAS":[151],"dgemm":[152],"operations.":[153],"We":[154,169],"describe":[155],"formulation":[157],"through":[162],"DaCe\u2019s":[163,185],"Stateful":[164],"Dataflow":[165],"Multigraph":[166],"(SDFG)":[167],"representation.":[168],"discuss":[170],"how":[171],"representation":[173],"facilitates":[174],"detail":[179],"workflow":[181],"integration":[183],"automatically":[186],"generated":[187],"into":[189],"solver.":[192],"Initial":[193],"GH200.":[197],"By":[198],"showcasing":[199],"potential":[201],"generation,":[205],"highlight":[207],"feasibility":[209],"supporting":[211],"long-term":[213],"large-scale":[216,228],"applications":[218],"by":[219],"using":[220],"portable":[221],"solutions":[222],"kernels":[226],"codes.":[229]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
