{"id":"https://openalex.org/W2080185015","doi":"https://doi.org/10.1109/hpec.2013.6670352","title":"Re-Introduction of communication-avoiding FMM-accelerated FFTs with GPU acceleration","display_name":"Re-Introduction of communication-avoiding FMM-accelerated FFTs with GPU acceleration","publication_year":2013,"publication_date":"2013-09-01","ids":{"openalex":"https://openalex.org/W2080185015","doi":"https://doi.org/10.1109/hpec.2013.6670352","mag":"2080185015"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2013.6670352","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2013.6670352","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/3431226","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011836461","display_name":"M. Harper Langston","orcid":null},"institutions":[{"id":"https://openalex.org/I4210102519","display_name":"Reservoir Labs (United States)","ror":"https://ror.org/01a3m3f05","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102519"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"M. Harper Langston","raw_affiliation_strings":["Reservoir Labs Inc., New York, NY","Reservoir Labs. Inc. New York, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Reservoir Labs Inc., New York, NY","institution_ids":["https://openalex.org/I4210102519"]},{"raw_affiliation_string":"Reservoir Labs. Inc. New York, New York, NY, USA","institution_ids":["https://openalex.org/I4210102519"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104040289","display_name":"Muthu Manikandan Baskaran","orcid":null},"institutions":[{"id":"https://openalex.org/I4210102519","display_name":"Reservoir Labs (United States)","ror":"https://ror.org/01a3m3f05","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102519"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Muthu Baskaran","raw_affiliation_strings":["Reservoir Labs Inc., New York, NY","Reservoir Labs. Inc. New York, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Reservoir Labs Inc., New York, NY","institution_ids":["https://openalex.org/I4210102519"]},{"raw_affiliation_string":"Reservoir Labs. Inc. New York, New York, NY, USA","institution_ids":["https://openalex.org/I4210102519"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047696430","display_name":"Beno\u00eet Meister","orcid":"https://orcid.org/0000-0003-1975-1376"},"institutions":[{"id":"https://openalex.org/I4210102519","display_name":"Reservoir Labs (United States)","ror":"https://ror.org/01a3m3f05","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102519"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Benoit Meister","raw_affiliation_strings":["Reservoir Labs Inc., New York, NY","Reservoir Labs. Inc. New York, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Reservoir Labs Inc., New York, NY","institution_ids":["https://openalex.org/I4210102519"]},{"raw_affiliation_string":"Reservoir Labs. Inc. New York, New York, NY, USA","institution_ids":["https://openalex.org/I4210102519"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017292577","display_name":"Nicolas Vasilache","orcid":"https://orcid.org/0000-0002-4096-3325"},"institutions":[{"id":"https://openalex.org/I4210102519","display_name":"Reservoir Labs (United States)","ror":"https://ror.org/01a3m3f05","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102519"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nicolas Vasilache","raw_affiliation_strings":["Reservoir Labs Inc., New York, NY","Reservoir Labs. Inc. New York, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Reservoir Labs Inc., New York, NY","institution_ids":["https://openalex.org/I4210102519"]},{"raw_affiliation_string":"Reservoir Labs. Inc. New York, New York, NY, USA","institution_ids":["https://openalex.org/I4210102519"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108503417","display_name":"Richard Lethin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210102519","display_name":"Reservoir Labs (United States)","ror":"https://ror.org/01a3m3f05","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102519"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Lethin","raw_affiliation_strings":["Reservoir Labs Inc., New York, NY","Reservoir Labs. Inc. New York, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Reservoir Labs Inc., New York, NY","institution_ids":["https://openalex.org/I4210102519"]},{"raw_affiliation_string":"Reservoir Labs. Inc. New York, New York, NY, USA","institution_ids":["https://openalex.org/I4210102519"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5011836461"],"corresponding_institution_ids":["https://openalex.org/I4210102519"],"apc_list":null,"apc_paid":null,"fwci":0.4408,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.66313187,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"33","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10739","display_name":"Electromagnetic Scattering and Analysis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10739","display_name":"Electromagnetic Scattering and Analysis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.8110575675964355},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7686914801597595},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6062237024307251},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5949507355690002},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5908229947090149},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.584992527961731},{"id":"https://openalex.org/keywords/communications-system","display_name":"Communications system","score":0.4679526090621948},{"id":"https://openalex.org/keywords/signal-processing","display_name":"Signal processing","score":0.4657479226589203},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.44763362407684326},{"id":"https://openalex.org/keywords/fast-multipole-method","display_name":"Fast multipole method","score":0.444446325302124},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.418817400932312},{"id":"https://openalex.org/keywords/multipole-expansion","display_name":"Multipole expansion","score":0.3962942659854889},{"id":"https://openalex.org/keywords/digital-signal-processing","display_name":"Digital signal processing","score":0.3756686747074127},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.34467262029647827},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33720844984054565},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.1917111575603485},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.11533913016319275}],"concepts":[{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.8110575675964355},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7686914801597595},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6062237024307251},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5949507355690002},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5908229947090149},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.584992527961731},{"id":"https://openalex.org/C101765175","wikidata":"https://www.wikidata.org/wiki/Q577764","display_name":"Communications system","level":2,"score":0.4679526090621948},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.4657479226589203},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.44763362407684326},{"id":"https://openalex.org/C135115559","wikidata":"https://www.wikidata.org/wiki/Q5437040","display_name":"Fast multipole method","level":3,"score":0.444446325302124},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.418817400932312},{"id":"https://openalex.org/C52765159","wikidata":"https://www.wikidata.org/wiki/Q1027847","display_name":"Multipole expansion","level":2,"score":0.3962942659854889},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.3756686747074127},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.34467262029647827},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33720844984054565},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.1917111575603485},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11533913016319275},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/hpec.2013.6670352","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2013.6670352","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},{"id":"pmh:oai:zenodo.org:3431226","is_oa":true,"landing_page_url":"https://zenodo.org/record/3431226","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:3431226","is_oa":true,"landing_page_url":"https://zenodo.org/record/3431226","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.8999999761581421,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W93506392","https://openalex.org/W1585137498","https://openalex.org/W1630643170","https://openalex.org/W1997494092","https://openalex.org/W2005998530","https://openalex.org/W2010122118","https://openalex.org/W2011253351","https://openalex.org/W2061171222","https://openalex.org/W2072897844","https://openalex.org/W2083206954","https://openalex.org/W2096070062","https://openalex.org/W2099813373","https://openalex.org/W2102182691","https://openalex.org/W2104373803","https://openalex.org/W2106805276","https://openalex.org/W2125473407","https://openalex.org/W2134572726","https://openalex.org/W2254254903","https://openalex.org/W3100417409","https://openalex.org/W4253609027","https://openalex.org/W6603799187","https://openalex.org/W6609812263","https://openalex.org/W6635156405","https://openalex.org/W6636696507","https://openalex.org/W6668478505","https://openalex.org/W6691950125"],"related_works":["https://openalex.org/W2952819168","https://openalex.org/W1822333417","https://openalex.org/W2158582466","https://openalex.org/W2076771790","https://openalex.org/W3105194265","https://openalex.org/W599820626","https://openalex.org/W2125081029","https://openalex.org/W47721382","https://openalex.org/W9152652","https://openalex.org/W4385486246"],"abstract_inverted_index":{"As":[0,25],"distributed":[1],"memory":[2],"systems":[3],"grow":[4],"larger,":[5],"communication":[6,21,118,165,178],"demands":[7],"have":[8,23,85],"increased.":[9],"Unfortunately,":[10],"while":[11],"the":[12,40,62,70,94,99,105,109,113,121,129,133,152,156],"costs":[13,22],"of":[14,39,61,104,112,124,136,155],"arithmetic":[15],"operations":[16],"continue":[17],"to":[18,66,101,108,115,127],"decrease":[19],"rapidly,":[20],"not.":[24],"a":[26,31,159,170],"result,":[27],"there":[28],"has":[29,57],"been":[30],"growing":[32],"interest":[33],"in":[34,43,80],"communication-avoiding":[35,47],"algorithms":[36],"for":[37,93,132,150,166,173],"some":[38],"classic":[41],"problems":[42],"numerical":[44],"computing,":[45],"including":[46],"Fast":[48,71],"Fourier":[49],"Transforms":[50],"(FFTs).":[51],"A":[52],"previously-developed":[53],"low-communication":[54],"FFT,":[55,96],"however,":[56],"remained":[58],"largely":[59],"out":[60],"picture,":[63],"partially":[64],"due":[65],"its":[67],"reliance":[68],"on":[69,163],"Multipole":[72],"Method":[73],"(FMM),":[74],"an":[75,87],"algorithm":[76,143],"that":[77],"typically":[78],"aids":[79],"accelerating":[81],"dense":[82,174],"computations.":[83],"We":[84,140],"begun":[86],"algorithmic":[88],"investigation":[89],"and":[90,119,169],"re-implementation":[91],"design":[92],"FMM-accelerated":[95],"which":[97,125],"exploits":[98],"ability":[100],"tune":[102],"precision":[103],"result":[106],"(due":[107],"mathematical":[110],"nature":[111],"FMM)":[114],"reduce":[116,128],"power-burning":[117],"computation,":[120],"potential":[122],"benefit":[123],"is":[126],"energy":[130],"required":[131],"fundamental":[134],"transform":[135],"digital":[137],"signal":[138],"processing.":[139],"reintroduce":[141],"this":[142],"as":[144,146],"well":[145],"discuss":[147],"new":[148],"innovations":[149],"separating":[151],"distinct":[153],"portions":[154],"FMM":[157],"into":[158],"CPU-dedicated":[160],"process,":[161],"relying":[162],"inter-processor":[164],"approximate":[167],"interactions,":[168],"GPU-dedicated":[171],"process":[172],"interactions":[175],"with":[176],"no":[177],"<sup":[179],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[180],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[181],".":[182]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2014,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
