{"id":"https://openalex.org/W2139205226","doi":"https://doi.org/10.1109/ipdps.2010.5470415","title":"Optimizing and tuning the fast multipole method for state-of-the-art multicore architectures","display_name":"Optimizing and tuning the fast multipole method for state-of-the-art multicore architectures","publication_year":2010,"publication_date":"2010-01-01","ids":{"openalex":"https://openalex.org/W2139205226","doi":"https://doi.org/10.1109/ipdps.2010.5470415","mag":"2139205226"},"language":"en","primary_location":{"id":"doi:10.1109/ipdps.2010.5470415","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2010.5470415","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Symposium on Parallel &amp; Distributed Processing (IPDPS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://escholarship.org/content/qt9m9634ss/qt9m9634ss.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074121652","display_name":"Aparna Chandramowlishwaran","orcid":"https://orcid.org/0000-0003-0840-4192"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aparna Chandramowlishwaran","raw_affiliation_strings":["CRD, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","College of Computing, Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"CRD, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]},{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102746800","display_name":"Samuel Williams","orcid":"https://orcid.org/0000-0002-8327-5717"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samuel Williams","raw_affiliation_strings":["CRD, Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"CRD, Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113842004","display_name":"Leonid Oliker","orcid":"https://orcid.org/0000-0002-7923-2896"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Leonid Oliker","raw_affiliation_strings":["College of Computing, Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011362871","display_name":"Ilya Lashuk","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ilya Lashuk","raw_affiliation_strings":["College of Computing, Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044137409","display_name":"George Biros","orcid":"https://orcid.org/0000-0002-0033-3994"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George Biros","raw_affiliation_strings":["College of Computing, Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016738770","display_name":"Richard Vuduc","orcid":"https://orcid.org/0000-0003-2178-138X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Vuduc","raw_affiliation_strings":["College of Computing, Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"College of Computing, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5074121652"],"corresponding_institution_ids":["https://openalex.org/I130701444","https://openalex.org/I148283060"],"apc_list":null,"apc_paid":null,"fwci":6.7261,"has_fulltext":true,"cited_by_count":52,"citation_normalized_percentile":{"value":0.97565527,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10739","display_name":"Electromagnetic Scattering and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10739","display_name":"Electromagnetic Scattering and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11263","display_name":"Electromagnetic Simulation and Numerical Methods","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11383","display_name":"Advanced Antenna and Metasurface Technologies","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.7718871831893921},{"id":"https://openalex.org/keywords/multipole-expansion","display_name":"Multipole expansion","score":0.7616641521453857},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.743145227432251},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6648737788200378},{"id":"https://openalex.org/keywords/fast-multipole-method","display_name":"Fast multipole method","score":0.5405610203742981},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.41358792781829834},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.38815295696258545},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18478232622146606},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08215945959091187}],"concepts":[{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.7718871831893921},{"id":"https://openalex.org/C52765159","wikidata":"https://www.wikidata.org/wiki/Q1027847","display_name":"Multipole expansion","level":2,"score":0.7616641521453857},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.743145227432251},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6648737788200378},{"id":"https://openalex.org/C135115559","wikidata":"https://www.wikidata.org/wiki/Q5437040","display_name":"Fast multipole method","level":3,"score":0.5405610203742981},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.41358792781829834},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.38815295696258545},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18478232622146606},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08215945959091187},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/ipdps.2010.5470415","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2010.5470415","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Symposium on Parallel &amp; Distributed Processing (IPDPS)","raw_type":"proceedings-article"},{"id":"pmh:oai:escholarship.org:ark:/13030/qt9m9634ss","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/9m9634ss","pdf_url":"https://escholarship.org/content/qt9m9634ss/qt9m9634ss.pdf","source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},{"id":"pmh:qt9m9634ss","is_oa":false,"landing_page_url":"http://www.escholarship.org/uc/item/9m9634ss","pdf_url":null,"source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Chandramowlishwaran, A; Williams, S; Oliker, L; Lashuk, I; Biros, G; &amp; Vuduc, R. (2010). Optimizing and tuning the fast multipole method for state-of-the-art multicore architectures. Proceedings of the 2010 IEEE International Symposium on Parallel and Distributed Processing, IPDPS 2010. doi: 10.1109/IPDPS.2010.5470415. UC Irvine: Retrieved from: http://www.escholarship.org/uc/item/9m9634ss","raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:escholarship.org:ark:/13030/qt9m9634ss","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/9m9634ss","pdf_url":"https://escholarship.org/content/qt9m9634ss/qt9m9634ss.pdf","source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.7699999809265137}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309321","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320337506","display_name":"Advanced Scientific Computing Research","ror":"https://ror.org/0012c7r22"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2139205226.pdf","grobid_xml":"https://content.openalex.org/works/W2139205226.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W2048007344","https://openalex.org/W2050662034","https://openalex.org/W2054622952","https://openalex.org/W2058042523","https://openalex.org/W2078313079","https://openalex.org/W2083206954","https://openalex.org/W2089686040","https://openalex.org/W2099813373","https://openalex.org/W2102182691","https://openalex.org/W2103241610","https://openalex.org/W2117926105","https://openalex.org/W2130014733","https://openalex.org/W2136770272","https://openalex.org/W2141170493","https://openalex.org/W2165765261","https://openalex.org/W2167173222","https://openalex.org/W2171296521","https://openalex.org/W3041169337","https://openalex.org/W3141032579"],"related_works":["https://openalex.org/W2952819168","https://openalex.org/W1822333417","https://openalex.org/W2158582466","https://openalex.org/W2076771790","https://openalex.org/W3105194265","https://openalex.org/W599820626","https://openalex.org/W2125081029","https://openalex.org/W47721382","https://openalex.org/W9152652","https://openalex.org/W4385486246"],"abstract_inverted_index":{"This":[0],"work":[1],"presents":[2],"the":[3,15,97],"first":[4],"extensive":[5],"study":[6],"of":[7,14],"single-node":[8],"performance":[9,31,59,107],"optimization,":[10],"tuning,":[11,35],"and":[12,27,43,54,71,93,108],"analysis":[13],"fast":[16],"multipole":[17],"method":[18],"(FMM)":[19],"on":[20,62,67,73,78],"modern":[21],"multi-core":[22],"systems.":[23],"We":[24,81],"consider":[25],"single-":[26],"double-precision":[28,58],"with":[29,111],"numerous":[30,48],"enhancements,":[32],"including":[33],"low-level":[34],"numerical":[36],"approximation,":[37],"data":[38],"structure":[39],"transformations,":[40],"OpenMP":[41],"parallelization,":[42],"algorithmic":[44],"tuning.":[45],"Among":[46],"our":[47,84,88],"findings,":[49],"we":[50],"show":[51],"that":[52,96],"optimization":[53],"parallelization":[55],"can":[56],"improve":[57],"by":[60],"25\u00d7":[61],"Intel's":[63],"quad-core":[64,69],"Nehalem,":[65],"9.4\u00d7":[66],"AMD's":[68],"Barcelona,":[70],"37.6\u00d7":[72],"Sun's":[74],"Victoria":[75],"Falls":[76],"(dual-sockets":[77],"all":[79],"systems).":[80],"also":[82],"compare":[83],"single-precision":[85],"version":[86],"against":[87],"prior":[89],"state-of-the-art":[90],"GPU-based":[91],"code":[92],"show,":[94],"surprisingly,":[95],"most":[98,113],"advanced":[99,114],"multicore":[100],"architecture":[101],"(Nehalem)":[102],"reaches":[103],"parity":[104],"in":[105],"both":[106],"power":[109],"efficiency":[110],"NVIDIA's":[112],"GPU":[115],"architecture.":[116]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":13}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
