{"id":"https://openalex.org/W7123364299","doi":"https://doi.org/10.1109/candar68384.2025.00031","title":"GPU Acceleration of RI-RMP2 Correlation Energy Computation","display_name":"GPU Acceleration of RI-RMP2 Correlation Energy Computation","publication_year":2025,"publication_date":"2025-11-25","ids":{"openalex":"https://openalex.org/W7123364299","doi":"https://doi.org/10.1109/candar68384.2025.00031"},"language":null,"primary_location":{"id":"doi:10.1109/candar68384.2025.00031","is_oa":false,"landing_page_url":"https://doi.org/10.1109/candar68384.2025.00031","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Thirteenth International Symposium on Computing and Networking (CANDAR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111269964","display_name":"Kanta Suzuki","orcid":null},"institutions":[{"id":"https://openalex.org/I113306721","display_name":"Hiroshima University","ror":"https://ror.org/03t78wx29","country_code":"JP","type":"education","lineage":["https://openalex.org/I113306721"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Kanta Suzuki","raw_affiliation_strings":["Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527"],"affiliations":[{"raw_affiliation_string":"Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527","institution_ids":["https://openalex.org/I113306721"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009547258","display_name":"Yasuaki Ito","orcid":"https://orcid.org/0000-0003-0593-231X"},"institutions":[{"id":"https://openalex.org/I113306721","display_name":"Hiroshima University","ror":"https://ror.org/03t78wx29","country_code":"JP","type":"education","lineage":["https://openalex.org/I113306721"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yasuaki Ito","raw_affiliation_strings":["Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527"],"affiliations":[{"raw_affiliation_string":"Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527","institution_ids":["https://openalex.org/I113306721"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108960905","display_name":"Nobuya Yokogawa","orcid":null},"institutions":[{"id":"https://openalex.org/I113306721","display_name":"Hiroshima University","ror":"https://ror.org/03t78wx29","country_code":"JP","type":"education","lineage":["https://openalex.org/I113306721"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Nobuya Yokogawa","raw_affiliation_strings":["Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527"],"affiliations":[{"raw_affiliation_string":"Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527","institution_ids":["https://openalex.org/I113306721"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107945532","display_name":"Satoki Tsuji","orcid":null},"institutions":[{"id":"https://openalex.org/I113306721","display_name":"Hiroshima University","ror":"https://ror.org/03t78wx29","country_code":"JP","type":"education","lineage":["https://openalex.org/I113306721"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoki Tsuji","raw_affiliation_strings":["Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527"],"affiliations":[{"raw_affiliation_string":"Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527","institution_ids":["https://openalex.org/I113306721"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102668696","display_name":"Koji Nakano","orcid":null},"institutions":[{"id":"https://openalex.org/I113306721","display_name":"Hiroshima University","ror":"https://ror.org/03t78wx29","country_code":"JP","type":"education","lineage":["https://openalex.org/I113306721"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Koji Nakano","raw_affiliation_strings":["Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527"],"affiliations":[{"raw_affiliation_string":"Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527","institution_ids":["https://openalex.org/I113306721"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046692303","display_name":"Victor Parque","orcid":"https://orcid.org/0000-0001-7329-1468"},"institutions":[{"id":"https://openalex.org/I113306721","display_name":"Hiroshima University","ror":"https://ror.org/03t78wx29","country_code":"JP","type":"education","lineage":["https://openalex.org/I113306721"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Victor Parque","raw_affiliation_strings":["Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527"],"affiliations":[{"raw_affiliation_string":"Hiroshima University,Graduate School of Advanced Science and Engineering,Higashi-Hiroshima,Japan,739-8527","institution_ids":["https://openalex.org/I113306721"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089627100","display_name":"Akihiko Kasagi","orcid":"https://orcid.org/0000-0002-5793-335X"},"institutions":[{"id":"https://openalex.org/I2252096349","display_name":"Fujitsu (Japan)","ror":"https://ror.org/038e2g226","country_code":"JP","type":"company","lineage":["https://openalex.org/I2252096349"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Akihiko Kasagi","raw_affiliation_strings":["Fujitsu Limited,Computing Laboratory,Kawasaki,Japan,211-8588"],"affiliations":[{"raw_affiliation_string":"Fujitsu Limited,Computing Laboratory,Kawasaki,Japan,211-8588","institution_ids":["https://openalex.org/I2252096349"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5111269964"],"corresponding_institution_ids":["https://openalex.org/I113306721"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.71626984,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"174","last_page":"180"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.583899974822998,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.583899974822998,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.094200000166893,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.050999999046325684,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7771999835968018},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7034000158309937},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.6148999929428101},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.5823000073432922},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5602999925613403},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.45840001106262207},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.4442000091075897},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4374000132083893},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.3880000114440918},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.37279999256134033}],"concepts":[{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7771999835968018},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7430999875068665},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7034000158309937},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6521000266075134},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.6148999929428101},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.614300012588501},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.5823000073432922},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5602999925613403},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.45840001106262207},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.4442000091075897},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4374000132083893},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.41589999198913574},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.3880000114440918},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.37279999256134033},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.3650999963283539},{"id":"https://openalex.org/C58053490","wikidata":"https://www.wikidata.org/wiki/Q176555","display_name":"Quantum computer","level":3,"score":0.36000001430511475},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.3529999852180481},{"id":"https://openalex.org/C12426560","wikidata":"https://www.wikidata.org/wiki/Q189569","display_name":"Basis (linear algebra)","level":2,"score":0.34869998693466187},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3483000099658966},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.33399999141693115},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.32580000162124634},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.32010000944137573},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3089999854564667},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.30660000443458557},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C143724316","wikidata":"https://www.wikidata.org/wiki/Q312468","display_name":"Series (stratigraphy)","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.2897000014781952},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.2768000066280365},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C19637589","wikidata":"https://www.wikidata.org/wiki/Q1117940","display_name":"Coupled cluster","level":3,"score":0.26910001039505005},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C19754495","wikidata":"https://www.wikidata.org/wiki/Q3435924","display_name":"Computational problem","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/candar68384.2025.00031","is_oa":false,"landing_page_url":"https://doi.org/10.1109/candar68384.2025.00031","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Thirteenth International Symposium on Computing and Networking (CANDAR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0,22,60],"Computational":[1],"Quantum":[2],"Chemistry,":[3],"the":[4,24,35,71,84,97,112,144,191],"restricted":[5],"second-order":[6],"M0ller-Plesset":[7],"theory":[8],"(RMP2)":[9],"has":[10],"garnered":[11],"significant":[12],"attention":[13],"for":[14,130],"performing":[15],"high-accuracy":[16],"computations":[17],"on":[18,157],"closed-shell":[19],"molecular":[20],"systems.":[21],"particular,":[23],"RMP2":[25],"with":[26,40,121,165],"Resolution-of-the-Identity":[27],"approximation":[28],"(RI-RMP2)":[29],"reduces":[30,143],"memory":[31,99],"usage":[32,100],"by":[33,137],"approximating":[34],"O(M<sup":[36,42,85,102],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[37,43,86,103],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">4</sup>)":[38,87],"tensor":[39,88],"an":[41,108,158],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">3</sup>)":[44],"intermediate":[45],"tensor.":[46],"However,":[47],"this":[48,61],"approach":[49],"still":[50],"requires":[51],"a":[52,65,90,117,176,180],"high":[53],"computational":[54],"cost,":[55],"making":[56],"further":[57],"acceleration":[58],"necessary.":[59],"paper,":[62],"we":[63,106],"propose":[64],"GPU-based":[66],"implementation":[67,80,172],"to":[68,101,110,175,185],"efficiently":[69],"compute":[70,111,135],"RI-RMP2":[72,113],"correlation":[73,114],"energy.":[74],"The":[75],"key":[76],"idea":[77],"of":[78,147,194],"our":[79,171],"lies":[81],"in":[82],"reconstructing":[83],"via":[89,116],"vendor-optimized":[91],"matrix":[92],"multiplication":[93],"routine,":[94],"while":[95],"limiting":[96],"required":[98,145],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">3</sup>).":[104],"Furthermore,":[105],"formulate":[107],"equation":[109],"energy":[115],"four-term":[118],"decomposition":[119],"(summation)":[120],"mutually":[122],"exclusive":[123],"index":[124],"ranges,":[125],"create":[126],"separate":[127],"CUDA":[128,139],"kernels":[129],"each":[131],"decomposed":[132],"term,":[133],"and":[134,149,183,187,201],"them":[136],"different":[138,166],"streams.":[140],"Our":[141],"method":[142],"number":[146],"threads":[148],"maximizes":[150],"GPU":[151,161],"resource":[152],"utilization.":[153],"Experimental":[154],"evaluations":[155],"conducted":[156],"NVIDIA":[159],"A100":[160],"using":[162],"relevant":[163],"molecules":[164],"basis":[167],"sets":[168],"demonstrate":[169],"that":[170],"achieves":[173],"up":[174,184],"23.18\u00d7":[177],"speedup":[178],"over":[179,190],"naive":[181],"implementation,":[182],"165.16\u00d7":[186],"4.04\u00d7":[188],"speedups":[189],"state-of-the-art":[192],"versions":[193],"existing":[195],"quantum":[196],"chemistry":[197],"software":[198],"packages,":[199],"PySCF":[200],"GPU4PySCF,":[202],"respectively.":[203]},"counts_by_year":[],"updated_date":"2026-01-14T00:46:21.520733","created_date":"2026-01-14T00:00:00"}
