{"id":"https://openalex.org/W2955858218","doi":"https://doi.org/10.1109/tpds.2019.2926084","title":"Optimizing Finite Volume Method Solvers on Nvidia GPUs","display_name":"Optimizing Finite Volume Method Solvers on Nvidia GPUs","publication_year":2019,"publication_date":"2019-07-07","ids":{"openalex":"https://openalex.org/W2955858218","doi":"https://doi.org/10.1109/tpds.2019.2926084","mag":"2955858218"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2019.2926084","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2019.2926084","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100659425","display_name":"Jingheng Xu","orcid":"https://orcid.org/0000-0001-7311-9924"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingheng Xu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115591592","display_name":"Guangwen Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangwen Yang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031545295","display_name":"Haohuan Fu","orcid":"https://orcid.org/0000-0002-6982-2235"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haohuan Fu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057940557","display_name":"Wayne Luk","orcid":"https://orcid.org/0000-0002-6750-927X"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wayne Luk","raw_affiliation_strings":["Imperial College, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Imperial College, London, United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038750675","display_name":"Lin Gan","orcid":"https://orcid.org/0000-0003-1297-4462"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Gan","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101603393","display_name":"Wen Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Shi","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091437474","display_name":"Wei Xue","orcid":"https://orcid.org/0000-0001-9740-6581"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Xue","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005718995","display_name":"Chao Yang","orcid":"https://orcid.org/0000-0001-7426-6248"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Yang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101716678","display_name":"Yong Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Jiang","raw_affiliation_strings":["Graduate School at Shenzhen, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101615091","display_name":"Conghui He","orcid":"https://orcid.org/0000-0001-8697-695X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Conghui He","raw_affiliation_strings":["Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5100659425"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":2.9538,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.91194087,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":98},"biblio":{"volume":"30","issue":"12","first_page":"2790","last_page":"2805"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8752235174179077},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.808906078338623},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7946979999542236},{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.7247115969657898},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.5849413871765137},{"id":"https://openalex.org/keywords/finite-volume-method","display_name":"Finite volume method","score":0.515061616897583},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.49289751052856445},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.47088131308555603},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.42470696568489075},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.42401647567749023},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.10395681858062744},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08219367265701294}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8752235174179077},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.808906078338623},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7946979999542236},{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.7247115969657898},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.5849413871765137},{"id":"https://openalex.org/C50478463","wikidata":"https://www.wikidata.org/wiki/Q1401936","display_name":"Finite volume method","level":2,"score":0.515061616897583},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.49289751052856445},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.47088131308555603},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.42470696568489075},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.42401647567749023},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.10395681858062744},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08219367265701294},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C57879066","wikidata":"https://www.wikidata.org/wiki/Q41217","display_name":"Mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2019.2926084","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2019.2926084","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6600000262260437,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G2377984233","display_name":null,"funder_award_id":"51761135015","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W204599449","https://openalex.org/W1537224157","https://openalex.org/W1753847782","https://openalex.org/W1838639847","https://openalex.org/W1967802285","https://openalex.org/W1970507985","https://openalex.org/W1976603717","https://openalex.org/W1978782819","https://openalex.org/W1995016628","https://openalex.org/W1998659319","https://openalex.org/W2002555321","https://openalex.org/W2002950519","https://openalex.org/W2021089857","https://openalex.org/W2023415862","https://openalex.org/W2037274813","https://openalex.org/W2037520749","https://openalex.org/W2039378765","https://openalex.org/W2048985617","https://openalex.org/W2056295840","https://openalex.org/W2063217111","https://openalex.org/W2067479799","https://openalex.org/W2072480616","https://openalex.org/W2076377833","https://openalex.org/W2076514761","https://openalex.org/W2098920641","https://openalex.org/W2113190809","https://openalex.org/W2127073620","https://openalex.org/W2133636438","https://openalex.org/W2148209604","https://openalex.org/W2153689314","https://openalex.org/W2155503253","https://openalex.org/W2292462258","https://openalex.org/W2490342239","https://openalex.org/W2505067803","https://openalex.org/W2562913534","https://openalex.org/W2583958932","https://openalex.org/W2593610709","https://openalex.org/W2612630614","https://openalex.org/W2618054938","https://openalex.org/W2743163708","https://openalex.org/W2751354592","https://openalex.org/W2999733275","https://openalex.org/W3036479320","https://openalex.org/W6608260506","https://openalex.org/W6637764043","https://openalex.org/W6697338583","https://openalex.org/W6734574453"],"related_works":["https://openalex.org/W2356602486","https://openalex.org/W2351992668","https://openalex.org/W2621501241","https://openalex.org/W2208757713","https://openalex.org/W3177128669","https://openalex.org/W2031026366","https://openalex.org/W2717174904","https://openalex.org/W2991848348","https://openalex.org/W2548545698","https://openalex.org/W2092936829"],"abstract_inverted_index":{"As":[0],"scientific":[1],"applications":[2,148],"are":[3],"increasingly":[4],"ported":[5],"to":[6,8,80,88,137,140,146],"GPUs":[7],"benefit":[9],"from":[10,76],"both":[11],"the":[12,38,42,54,67,77,81,91,101,131,142,159,181,186],"powerful":[13],"computing":[14],"capacity":[15],"and":[16,30,115,125],"high":[17],"throughput,":[18],"accelerating":[19],"explicit":[20,55],"solvers":[21],"for":[22,158],"GPU-based":[23],"finite":[24],"volume":[25],"methods":[26],"is":[27,156,174],"gaining":[28],"more":[29,31],"attention.":[32],"In":[33],"this":[34,191],"paper,":[35],"based":[36,97],"on":[37,107,118,149,163],"detailed":[39,128],"analysis":[40,129],"of":[41,49,94,103,121,130],"FVM":[43],"algorithm,":[44],"we":[45],"present":[46],"a":[47,73,127,175],"set":[48],"novel":[50],"optimization":[51],"methods,":[52],"including":[53],"data":[56],"cache":[57],"mechanism,":[58],"optimal":[59],"global":[60],"memory":[61],"loading":[62],"strategy,":[63],"as":[64,66,87,136],"well":[65],"inner-thread":[68],"rescheduling":[69],"method,":[70],"which":[71,173],"derives":[72],"suitable":[74],"mapping":[75],"solver":[78,162],"algorithm":[79],"underlying":[82],"GPU":[83,123,151],"hardware":[84],"architecture,":[85],"so":[86,135],"remarkably":[89],"improve":[90],"solving":[92],"performance":[93],"structured":[95],"mesh":[96],"FVM.":[98],"We":[99],"demonstrate":[100,138],"impact":[102],"our":[104],"tuning":[105,133,144,187],"techniques":[106,188],"two":[108],"widely-used":[109],"atmospheric":[110],"dynamic":[111],"kernels":[112],"(3-D":[113],"Euler":[114,161],"2-D":[116],"SWE)":[117],"five":[119],"kinds":[120],"mainstream":[122],"platforms,":[124],"make":[126],"different":[132,147],"methodologies":[134],"how":[139],"select":[141],"proper":[143],"strategy":[145],"various":[150],"platforms.":[152],"Specifically,":[153],"93.9x":[154],"speedup":[155,183],"achieved":[157],"3D":[160],"Nvidia":[164],"V100":[165],"over":[166],"one":[167],"12-core":[168],"Intel":[169],"E5-2697":[170],"(v2)":[171],"CPU,":[172],"77":[176],"percent":[177],"improvement":[178],"compared":[179],"with":[180],"original":[182],"without":[184],"adopting":[185],"presented":[189],"in":[190],"work.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
