{"id":"https://openalex.org/W4416513907","doi":"https://doi.org/10.1109/isncc66965.2025.11250479","title":"A Study on the Improvement of Athena++ Solvers for HPC-AI Applications","display_name":"A Study on the Improvement of Athena++ Solvers for HPC-AI Applications","publication_year":2025,"publication_date":"2025-10-27","ids":{"openalex":"https://openalex.org/W4416513907","doi":"https://doi.org/10.1109/isncc66965.2025.11250479"},"language":null,"primary_location":{"id":"doi:10.1109/isncc66965.2025.11250479","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isncc66965.2025.11250479","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Symposium on Networks, Computers and Communications (ISNCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006558854","display_name":"Cheol-Joo Chae","orcid":"https://orcid.org/0009-0001-7555-4070"},"institutions":[{"id":"https://openalex.org/I4210118585","display_name":"National Fisheries Research and Development Institute","ror":"https://ror.org/02chzeh21","country_code":"KR","type":"funder","lineage":["https://openalex.org/I2801339556","https://openalex.org/I4210090853","https://openalex.org/I4210118585"]},{"id":"https://openalex.org/I4210146532","display_name":"National Fisheries University","ror":"https://ror.org/04kkb3773","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210146532"]}],"countries":["JP","KR"],"is_corresponding":true,"raw_author_name":"Cheol-Joo Chae","raw_affiliation_strings":["Korea National University of Agriculture and Fisheries,dept. General Education,Jeonju,Korea"],"affiliations":[{"raw_affiliation_string":"Korea National University of Agriculture and Fisheries,dept. General Education,Jeonju,Korea","institution_ids":["https://openalex.org/I4210118585","https://openalex.org/I4210146532"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052992006","display_name":"Hyunjo Lee","orcid":"https://orcid.org/0000-0002-1316-6822"},"institutions":[{"id":"https://openalex.org/I4210118585","display_name":"National Fisheries Research and Development Institute","ror":"https://ror.org/02chzeh21","country_code":"KR","type":"funder","lineage":["https://openalex.org/I2801339556","https://openalex.org/I4210090853","https://openalex.org/I4210118585"]},{"id":"https://openalex.org/I4210146532","display_name":"National Fisheries University","ror":"https://ror.org/04kkb3773","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210146532"]}],"countries":["JP","KR"],"is_corresponding":false,"raw_author_name":"Hyunjo Lee","raw_affiliation_strings":["Korea National University of Agriculture and Fisheries,dept. General Education,Jeonju,Korea"],"affiliations":[{"raw_affiliation_string":"Korea National University of Agriculture and Fisheries,dept. General Education,Jeonju,Korea","institution_ids":["https://openalex.org/I4210118585","https://openalex.org/I4210146532"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044381129","display_name":"Hyun Jung","orcid":"https://orcid.org/0000-0001-7467-8189"},"institutions":[{"id":"https://openalex.org/I878022262","display_name":"Korea Institute of Science & Technology Information","ror":"https://ror.org/01k4yrm29","country_code":"KR","type":"facility","lineage":["https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098","https://openalex.org/I878022262"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyun Mi Jung","raw_affiliation_strings":["Korea Institute of Science and Technology Information,center for supercomputing technology development,Daejeon,Korea"],"affiliations":[{"raw_affiliation_string":"Korea Institute of Science and Technology Information,center for supercomputing technology development,Daejeon,Korea","institution_ids":["https://openalex.org/I878022262"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006558854"],"corresponding_institution_ids":["https://openalex.org/I4210118585","https://openalex.org/I4210146532"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38799339,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8894000053405762,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8894000053405762,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.021199999377131462,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13200","display_name":"Spacecraft and Cryogenic Technologies","score":0.00800000037997961,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.6682999730110168},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.6535000205039978},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.553600013256073},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.47279998660087585},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4560000002384186},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.38929998874664307},{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.3831000030040741},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.37630000710487366},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.36329999566078186}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8215000033378601},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7264999747276306},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.6682999730110168},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6535000205039978},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.553600013256073},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5116000175476074},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.47279998660087585},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4560000002384186},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.38929998874664307},{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.3831000030040741},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.37630000710487366},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.36329999566078186},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.35010001063346863},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.3449999988079071},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.31859999895095825},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.31139999628067017},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.29760000109672546},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2896000146865845},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isncc66965.2025.11250479","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isncc66965.2025.11250479","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Symposium on Networks, Computers and Communications (ISNCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322105","display_name":"Korea Institute of Science and Technology Information","ror":"https://ror.org/01k4yrm29"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W1969760987","https://openalex.org/W2059049415","https://openalex.org/W4205236375"],"related_works":[],"abstract_inverted_index":{"This":[0,175],"study":[1],"introduces":[2],"a":[3,32,120,178],"GPU-accelerated":[4],"optimization":[5],"of":[6,16,83,105,135,195],"the":[7,14,53,94,112,160,192],"Athena++":[8,30],"simulation":[9],"framework,":[10],"focusing":[11],"on":[12],"enhancing":[13],"performance":[15,59,140,168],"HLLC":[17],"and":[18,25,45,61,86,103,167,180],"HLLD":[19,95],"solvers":[20,40],"for":[21,171,183],"High-Performance":[22],"Computing":[23],"(HPC)":[24],"Artificial":[26],"Intelligence":[27],"(AI)":[28],"applications.":[29],"is":[31],"well-established":[33],"astrophysical":[34],"code":[35],"that":[36],"employs":[37],"advanced":[38],"Riemann":[39],"to":[41,57,65,77,152],"simulate":[42],"fluid":[43],"dynamics":[44],"magnetohydrodynamics":[46],"(MHD).":[47],"In":[48,92],"our":[49],"work,":[50],"we":[51],"analyze":[52],"solver":[54,96],"execution":[55,102],"flow":[56],"identify":[58],"bottlenecks":[60],"redesign":[62],"critical":[63],"components":[64],"leverage":[66],"GPU":[67,185],"parallelism":[68],"effectively.":[69],"Key":[70],"strategies":[71],"include":[72],"selective":[73],"input":[74],"array":[75],"transfer":[76],"minimize":[78],"memory":[79],"overhead,":[80],"CPU-side":[81],"preprocessing":[82],"external":[84],"functions,":[85],"loop-level":[87],"parallelization":[88],"within":[89],"CUDA":[90],"kernels.":[91],"particular,":[93],"was":[97,149],"optimized":[98],"through":[99],"asynchronous":[100],"kernel":[101,157],"decoupling":[104],"CPU-exclusive":[106],"logic.":[107],"Experimental":[108],"results":[109],"conducted":[110],"in":[111,125,142,198],"Google":[113],"Colab":[114],"environment":[115],"using":[116],"NVIDIA":[117],"GPUs":[118],"showed":[119],"$\\sim":[121],"25":[122],"\\times$":[123],"improvement":[124],"average":[126],"time":[127],"per":[128],"cycle,":[129],"with":[130],"an":[131],"overall":[132],"runtime":[133],"reduction":[134],"approximately":[136],"$20":[137],"\\%$.":[138],"Some":[139],"degradation":[141],"total":[143],"cycle":[144],"count":[145],"($\\sim":[146],"12.6":[147],"\\%$)":[148],"observed":[150],"due":[151],"numerical":[153],"variations":[154],"introduced":[155],"during":[156],"migration.":[158],"Nonetheless,":[159],"proposed":[161],"GPU-enabled":[162],"approach":[163],"demonstrates":[164],"notable":[165],"scalability":[166],"benefits,":[169],"particularly":[170],"large-scale":[172],"MHD":[173],"problems.":[174],"work":[176],"offers":[177],"practical":[179],"reusable":[181],"framework":[182],"integrating":[184],"acceleration":[186],"into":[187],"existing":[188],"solver-based":[189],"codes,":[190],"supporting":[191],"broader":[193],"goals":[194],"heterogeneous":[196],"computing":[197],"HPC-AI":[199],"systems.":[200]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-23T00:00:00"}
