{"id":"https://openalex.org/W3092942657","doi":"https://doi.org/10.1177/1094342020964857","title":"A CUDA fast multipole method with highly efficient M2L far field evaluation","display_name":"A CUDA fast multipole method with highly efficient M2L far field evaluation","publication_year":2020,"publication_date":"2020-10-12","ids":{"openalex":"https://openalex.org/W3092942657","doi":"https://doi.org/10.1177/1094342020964857","mag":"3092942657"},"language":"en","primary_location":{"id":"doi:10.1177/1094342020964857","is_oa":true,"landing_page_url":"https://doi.org/10.1177/1094342020964857","pdf_url":"https://journals.sagepub.com/doi/pdf/10.1177/1094342020964857","source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://journals.sagepub.com/doi/pdf/10.1177/1094342020964857","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032637520","display_name":"Bartosz Kohnke","orcid":"https://orcid.org/0000-0002-6000-5490"},"institutions":[{"id":"https://openalex.org/I4210131661","display_name":"Max Planck Institute for Biophysical Chemistry","ror":"https://ror.org/03e76ya46","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210131661"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bartosz Kohnke","raw_affiliation_strings":["Theoretical and Computational Biophysics, Max Planck Institute for Biophysical Chemistry, G\u00f6ttingen, Germany"],"affiliations":[{"raw_affiliation_string":"Theoretical and Computational Biophysics, Max Planck Institute for Biophysical Chemistry, G\u00f6ttingen, Germany","institution_ids":["https://openalex.org/I4210131661"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015041475","display_name":"Carsten Kutzner","orcid":"https://orcid.org/0000-0002-8719-0307"},"institutions":[{"id":"https://openalex.org/I4210131661","display_name":"Max Planck Institute for Biophysical Chemistry","ror":"https://ror.org/03e76ya46","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210131661"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Carsten Kutzner","raw_affiliation_strings":["Theoretical and Computational Biophysics, Max Planck Institute for Biophysical Chemistry, G\u00f6ttingen, Germany"],"affiliations":[{"raw_affiliation_string":"Theoretical and Computational Biophysics, Max Planck Institute for Biophysical Chemistry, G\u00f6ttingen, Germany","institution_ids":["https://openalex.org/I4210131661"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049753003","display_name":"A. Beckmann","orcid":"https://orcid.org/0000-0001-8722-2826"},"institutions":[{"id":"https://openalex.org/I171892758","display_name":"Forschungszentrum J\u00fclich","ror":"https://ror.org/02nv7yv05","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I171892758"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Andreas Beckmann","raw_affiliation_strings":["J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, J\u00fclich, Germany"],"affiliations":[{"raw_affiliation_string":"J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, J\u00fclich, Germany","institution_ids":["https://openalex.org/I171892758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089115290","display_name":"Gert Lube","orcid":"https://orcid.org/0000-0002-5259-5152"},"institutions":[{"id":"https://openalex.org/I74656192","display_name":"University of G\u00f6ttingen","ror":"https://ror.org/01y9bpm73","country_code":"DE","type":"education","lineage":["https://openalex.org/I74656192"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gert Lube","raw_affiliation_strings":["Institute for Numerical and Applied Mathematics, Georg-August University of G\u00f6ttingen, G\u00f6ttingen, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Numerical and Applied Mathematics, Georg-August University of G\u00f6ttingen, G\u00f6ttingen, Germany","institution_ids":["https://openalex.org/I74656192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033031137","display_name":"Ivo Kabadshow","orcid":null},"institutions":[{"id":"https://openalex.org/I171892758","display_name":"Forschungszentrum J\u00fclich","ror":"https://ror.org/02nv7yv05","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I171892758"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ivo Kabadshow","raw_affiliation_strings":["J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, J\u00fclich, Germany"],"affiliations":[{"raw_affiliation_string":"J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, J\u00fclich, Germany","institution_ids":["https://openalex.org/I171892758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078640630","display_name":"Holger Dachsel","orcid":null},"institutions":[{"id":"https://openalex.org/I171892758","display_name":"Forschungszentrum J\u00fclich","ror":"https://ror.org/02nv7yv05","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I171892758"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Holger Dachsel","raw_affiliation_strings":["J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, J\u00fclich, Germany"],"affiliations":[{"raw_affiliation_string":"J\u00fclich Supercomputing Centre, Forschungszentrum J\u00fclich, J\u00fclich, Germany","institution_ids":["https://openalex.org/I171892758"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045212144","display_name":"Helmut Grubm\u00fcller","orcid":"https://orcid.org/0000-0002-3270-3144"},"institutions":[{"id":"https://openalex.org/I4210131661","display_name":"Max Planck Institute for Biophysical Chemistry","ror":"https://ror.org/03e76ya46","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210131661"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Helmut Grubm\u00fcller","raw_affiliation_strings":["Theoretical and Computational Biophysics, Max Planck Institute for Biophysical Chemistry, G\u00f6ttingen, Germany"],"affiliations":[{"raw_affiliation_string":"Theoretical and Computational Biophysics, Max Planck Institute for Biophysical Chemistry, G\u00f6ttingen, Germany","institution_ids":["https://openalex.org/I4210131661"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5045212144"],"corresponding_institution_ids":["https://openalex.org/I4210131661"],"apc_list":null,"apc_paid":null,"fwci":1.7258,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.86558508,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"35","issue":"1","first_page":"97","last_page":"117"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11367","display_name":"Particle accelerators and beam dynamics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11367","display_name":"Particle accelerators and beam dynamics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10559","display_name":"Particle Accelerators and Free-Electron Lasers","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10739","display_name":"Electromagnetic Scattering and Analysis","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.804963231086731},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7591014504432678},{"id":"https://openalex.org/keywords/fast-multipole-method","display_name":"Fast multipole method","score":0.7039811015129089},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.672602653503418},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.5397343039512634},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.5156615972518921},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4356372654438019},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.33855000138282776},{"id":"https://openalex.org/keywords/multipole-expansion","display_name":"Multipole expansion","score":0.21160674095153809},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.10016942024230957}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.804963231086731},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7591014504432678},{"id":"https://openalex.org/C135115559","wikidata":"https://www.wikidata.org/wiki/Q5437040","display_name":"Fast multipole method","level":3,"score":0.7039811015129089},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.672602653503418},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.5397343039512634},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5156615972518921},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4356372654438019},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33855000138282776},{"id":"https://openalex.org/C52765159","wikidata":"https://www.wikidata.org/wiki/Q1027847","display_name":"Multipole expansion","level":2,"score":0.21160674095153809},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.10016942024230957},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1177/1094342020964857","is_oa":true,"landing_page_url":"https://doi.org/10.1177/1094342020964857","pdf_url":"https://journals.sagepub.com/doi/pdf/10.1177/1094342020964857","source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},{"id":"pmh:oai:publications.goettingen-research-online.de:2/80346","is_oa":true,"landing_page_url":"https://resolver.sub.uni-goettingen.de/purl?gro-2/80346","pdf_url":null,"source":{"id":"https://openalex.org/S4306401634","display_name":"GoeScholar  The Publication Server of the Georg-August-Universit\u00e4t G\u00f6ttingen (Georg-August-Universit\u00e4t G\u00f6ttingen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210122495","host_organization_name":"Asklepios Klinik St. Georg","host_organization_lineage":["https://openalex.org/I4210122495"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal_article"},{"id":"pmh:oai:pure.mpg.de:item_3260951","is_oa":true,"landing_page_url":"http://hdl.handle.net/21.11116/0000-0007-4C69-F","pdf_url":null,"source":{"id":"https://openalex.org/S4306400654","display_name":"MPG.PuRe (Max Planck Society)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149899117","host_organization_name":"Max Planck Society","host_organization_lineage":["https://openalex.org/I149899117"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1177/1094342020964857","is_oa":true,"landing_page_url":"https://doi.org/10.1177/1094342020964857","pdf_url":"https://journals.sagepub.com/doi/pdf/10.1177/1094342020964857","source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1934065861","display_name":null,"funder_award_id":"software for exascale computing 1648","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3092942657.pdf","grobid_xml":"https://content.openalex.org/works/W3092942657.grobid-xml"},"referenced_works_count":58,"referenced_works":["https://openalex.org/W1031578623","https://openalex.org/W1480785517","https://openalex.org/W1624860839","https://openalex.org/W1672666614","https://openalex.org/W1965060209","https://openalex.org/W1966078827","https://openalex.org/W1966332653","https://openalex.org/W1976821598","https://openalex.org/W1981224307","https://openalex.org/W1985759756","https://openalex.org/W2002555321","https://openalex.org/W2006748749","https://openalex.org/W2017164460","https://openalex.org/W2019504893","https://openalex.org/W2027109368","https://openalex.org/W2028293439","https://openalex.org/W2028479107","https://openalex.org/W2035687084","https://openalex.org/W2039369142","https://openalex.org/W2039425860","https://openalex.org/W2041902442","https://openalex.org/W2046020177","https://openalex.org/W2046266263","https://openalex.org/W2051395078","https://openalex.org/W2054622952","https://openalex.org/W2062505968","https://openalex.org/W2064388657","https://openalex.org/W2069883496","https://openalex.org/W2083206954","https://openalex.org/W2085246296","https://openalex.org/W2086741044","https://openalex.org/W2088294132","https://openalex.org/W2099813373","https://openalex.org/W2117926105","https://openalex.org/W2120891799","https://openalex.org/W2121400889","https://openalex.org/W2132820941","https://openalex.org/W2156871002","https://openalex.org/W2171308769","https://openalex.org/W2206856102","https://openalex.org/W2258884006","https://openalex.org/W2313116527","https://openalex.org/W2334862469","https://openalex.org/W2519009066","https://openalex.org/W2921098500","https://openalex.org/W2953751272","https://openalex.org/W2964186773","https://openalex.org/W2969925465","https://openalex.org/W2995921525","https://openalex.org/W3007069990","https://openalex.org/W3046453755","https://openalex.org/W3094076646","https://openalex.org/W3104531408","https://openalex.org/W3105194265","https://openalex.org/W3138798301","https://openalex.org/W4229754601","https://openalex.org/W4243050008","https://openalex.org/W4251346171"],"related_works":["https://openalex.org/W66594024","https://openalex.org/W2936386759","https://openalex.org/W3189307731","https://openalex.org/W2949962288","https://openalex.org/W2364686214","https://openalex.org/W1428699136","https://openalex.org/W2393707426","https://openalex.org/W4285739865","https://openalex.org/W2085873709","https://openalex.org/W4367553810"],"abstract_inverted_index":{"Solving":[0],"an":[1,24,99,222],"N-body":[2],"problem,":[3],"electrostatic":[4],"or":[5],"gravitational,":[6],"is":[7,23,113,192,198,213],"a":[8,69,193],"crucial":[9],"task":[10],"and":[11,56,122,137,184],"the":[12,29,38,106,116,185,216],"main":[13],"computational":[14,46],"bottleneck":[15],"in":[16,87,98],"many":[17],"scientific":[18],"applications.":[19],"Its":[20],"direct":[21],"solution":[22],"ubiquitous":[25],"showcase":[26],"example":[27],"for":[28,62,74,115,144,159],"compute":[30],"power":[31],"of":[32,187],"graphics":[33],"processing":[34],"units":[35],"(GPUs).":[36],"However,":[37],"na\u00efve":[39],"pairwise":[40],"summation":[41],"has":[42],"[Formula:":[43,59,79],"see":[44,60,80],"text]":[45,61,81],"complexity.":[47],"The":[48,90,163,190,208],"fast":[49],"multipole":[50],"method":[51],"(FMM)":[52],"can":[53],"reduce":[54],"runtime":[55,112],"complexity":[57],"to":[58,95,134,155,179],"any":[63],"specified":[64],"precision.":[65],"Here,":[66],"we":[67],"present":[68],"CUDA-accelerated,":[70],"C++":[71],"FMM":[72,176,212],"implementation":[73,147],"multi":[75],"particle":[76],"systems":[77],"with":[78],"potential":[82],"that":[83],"are":[84],"found,":[85],"e.g.":[86],"biomolecular":[88],"simulations.":[89],"algorithm":[91],"involves":[92],"several":[93],"operators":[94],"exchange":[96],"information":[97],"octree":[100],"data":[101],"structure.":[102],"We":[103,119],"focus":[104],"on":[105],"Multipole-to-Local":[107],"(M2L)":[108],"operator,":[109],"as":[110,221],"its":[111],"limiting":[114],"overall":[117],"performance.":[118],"propose,":[120],"implement":[121],"benchmark":[123],"three":[124],"different":[125],"M2L":[126],"parallelization":[127],"approaches.":[128],"Approach":[129,149],"(1)":[130],"utilizes":[131],"Unified":[132],"Memory":[133],"minimize":[135,180],"programming":[136],"porting":[138],"efforts.":[139],"It":[140,174],"achieves":[141],"decent":[142],"speedups":[143],"only":[145],"little":[146],"work.":[148],"(2)":[150],"employs":[151],"CUDA":[152,210],"Dynamic":[153],"Parallelism":[154],"significantly":[156],"improve":[157],"performance":[158,197],"high":[160],"approximation":[161],"accuracies.":[162],"presorted":[164],"list-based":[165],"approach":[166],"(3)":[167],"fits":[168],"periodic":[169],"boundary":[170],"conditions":[171],"particularly":[172],"well.":[173],"exploits":[175],"operator":[177],"symmetries":[178],"both":[181],"memory":[182,206],"access":[183],"number":[186],"complex":[188],"multiplications.":[189],"result":[191],"compute-bound":[194],"implementation,":[195],"i.e.":[196],"limited":[199],"by":[200,205],"arithmetic":[201],"operations":[202],"rather":[203],"than":[204],"accesses.":[207],"complete":[209],"parallelized":[211],"incorporated":[214],"within":[215],"GROMACS":[217],"molecular":[218],"dynamics":[219],"package":[220],"alternative":[223],"Coulomb":[224],"solver.":[225]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
