{"id":"https://openalex.org/W3198636154","doi":"https://doi.org/10.1109/hpec49654.2021.9622811","title":"A More Portable HeFFTe: Implementing a Fallback Algorithm for Scalable Fourier Transforms","display_name":"A More Portable HeFFTe: Implementing a Fallback Algorithm for Scalable Fourier Transforms","publication_year":2021,"publication_date":"2021-09-20","ids":{"openalex":"https://openalex.org/W3198636154","doi":"https://doi.org/10.1109/hpec49654.2021.9622811","mag":"3198636154"},"language":"en","primary_location":{"id":"doi:10.1109/hpec49654.2021.9622811","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec49654.2021.9622811","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047481769","display_name":"Daniel Sharp","orcid":"https://orcid.org/0000-0002-0439-5084"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]},{"id":"https://openalex.org/I4388482696","display_name":"Naval Research Laboratory Information Technology Division","ror":"https://ror.org/04xfp8b22","country_code":null,"type":"facility","lineage":["https://openalex.org/I1288214837","https://openalex.org/I1330347796","https://openalex.org/I175003984","https://openalex.org/I3130687028","https://openalex.org/I4388482696"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daniel Sharp","raw_affiliation_strings":["Center for Computational Science & Engineering, Massachussetts Institute of Technology, Cambridge, Massachussetts"],"affiliations":[{"raw_affiliation_string":"Center for Computational Science & Engineering, Massachussetts Institute of Technology, Cambridge, Massachussetts","institution_ids":["https://openalex.org/I63966007","https://openalex.org/I4388482696"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074244476","display_name":"Miroslav Stoyanov","orcid":"https://orcid.org/0000-0002-8199-5577"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]},{"id":"https://openalex.org/I4388482696","display_name":"Naval Research Laboratory Information Technology Division","ror":"https://ror.org/04xfp8b22","country_code":null,"type":"facility","lineage":["https://openalex.org/I1288214837","https://openalex.org/I1330347796","https://openalex.org/I175003984","https://openalex.org/I3130687028","https://openalex.org/I4388482696"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Miroslav Stoyanov","raw_affiliation_strings":["Center for Computational Science & Engineering, Massachussetts Institute of Technology, Cambridge, Massachussetts"],"affiliations":[{"raw_affiliation_string":"Center for Computational Science & Engineering, Massachussetts Institute of Technology, Cambridge, Massachussetts","institution_ids":["https://openalex.org/I63966007","https://openalex.org/I4388482696"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083604741","display_name":"Stanimire Tomov","orcid":"https://orcid.org/0000-0002-5937-7959"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stanimire Tomov","raw_affiliation_strings":["University of Tennessee, Knoxville, TN"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, TN","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075517045","display_name":"Jack Dongarra","orcid":"https://orcid.org/0000-0003-3247-1782"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jack Dongarra","raw_affiliation_strings":["University of Tennessee, Knoxville, TN"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, TN","institution_ids":["https://openalex.org/I75027704"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5047481769"],"corresponding_institution_ids":["https://openalex.org/I4388482696","https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":0.9211,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.72704323,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"56","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.9810000061988831,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7333685159683228},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6596769094467163},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5576004385948181},{"id":"https://openalex.org/keywords/fourier-transform","display_name":"Fourier transform","score":0.5285286903381348},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4701027274131775},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.4594252109527588},{"id":"https://openalex.org/keywords/algorithm-design","display_name":"Algorithm design","score":0.4242100119590759},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.375224769115448},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3507685959339142},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.34165048599243164},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1650724709033966},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11436781287193298}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7333685159683228},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6596769094467163},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5576004385948181},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.5285286903381348},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4701027274131775},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.4594252109527588},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.4242100119590759},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.375224769115448},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3507685959339142},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.34165048599243164},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1650724709033966},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11436781287193298},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec49654.2021.9622811","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec49654.2021.9622811","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1966904279","https://openalex.org/W2000531806","https://openalex.org/W2001984859","https://openalex.org/W2020141429","https://openalex.org/W2022028092","https://openalex.org/W2061171222","https://openalex.org/W2102182691","https://openalex.org/W2608012057","https://openalex.org/W2972517649","https://openalex.org/W3036255981","https://openalex.org/W3149448910","https://openalex.org/W4404988331","https://openalex.org/W6767607904","https://openalex.org/W6874584847","https://openalex.org/W6940596342"],"related_works":["https://openalex.org/W4327521644","https://openalex.org/W2978884468","https://openalex.org/W3132558499","https://openalex.org/W2005846134","https://openalex.org/W2168413811","https://openalex.org/W2369237035","https://openalex.org/W2948041274","https://openalex.org/W4226515754","https://openalex.org/W4367355863","https://openalex.org/W2132996594"],"abstract_inverted_index":{"The":[0,134,204],"Highly":[1],"Efficient":[2],"Fast":[3],"Fourier":[4],"Transform":[5],"for":[6,44,126,183],"Exascale":[7],"(heFFTe)":[8],"numerical":[9],"library":[10,27],"is":[11,53,175],"a":[12,39,64,70,116,122,169,188],"C++":[13],"implementation":[14],"of":[15,41,63,106,136],"distributed":[16],"multidimensional":[17],"FFTs":[18],"targeting":[19],"heterogeneous":[20],"and":[21,81,113,121,156,187,241],"scalable":[22],"systems.":[23],"To":[24],"date,":[25],"the":[26,45,61,77,110,184,225],"has":[28],"relied":[29],"on":[30,109],"users":[31],"to":[32,67,79,103,132,159,180,210,238],"provide":[33],"at":[34],"least":[35],"one":[36],"installation":[37],"from":[38],"selection":[40],"well-known":[42],"libraries":[43],"single":[46],"node/MPI-rank":[47],"one-dimensional":[48],"FFT":[49,130,163,202],"calculations":[50],"that":[51,88,174,190,215],"heFFTe":[52,68,83],"built":[54],"on.":[55],"In":[56,165],"this":[57,98,137],"paper,":[58],"we":[59,167],"describe":[60],"development":[62],"CPU-based":[65,162],"backend":[66,100,138],"as":[69,233,235],"reference,":[71],"or":[72,93,198,229],"\"stock\",":[73],"implementation.":[74],"This":[75],"allows":[76],"user":[78],"install":[80],"run":[82],"without":[84],"any":[85],"external":[86],"dependencies":[87],"may":[89,216],"include":[90],"restrictive":[91],"licensing":[92],"mandate":[94],"specific":[95,129],"hardware.":[96],"Furthermore,":[97],"stock":[99],"was":[101],"implemented":[102],"take":[104],"advantage":[105],"SIMD":[107],"capabilities":[108],"modern":[111],"CPU,":[112],"includes":[114],"both":[115,154],"custom":[117],"vectorized":[118,142],"complex":[119,185],"data-type":[120],"run-time":[123],"generated":[124],"call-graph":[125],"selecting":[127],"which":[128],"algorithm":[131],"call.":[133],"performance":[135,155],"greatly":[139],"increases":[140],"when":[141,147,194],"instructions":[143],"are":[144],"available":[145,220],"and,":[146],"vectorized,":[148],"it":[149],"provides":[150],"reasonable":[151],"scalability":[152,189,193],"in":[153,221],"accuracy":[157],"compared":[158,179],"an":[160],"alternative":[161],"backend.":[164],"particular,":[166],"illustrate":[168],"highly-performant":[170],"$\\mathcal{O}(N\\log":[171,242],"N)$":[172,243],"code":[173,182],"about":[176],"10\u00d7":[177],"faster":[178],"non-vectorized":[181],"arithmetic,":[186],"matches":[191],"heFFTe\u2019s":[192],"used":[195,209],"with":[196],"vendor":[197,222],"other":[199,212],"highly-optimized":[200],"1D":[201],"backends.":[203],"same":[205],"technology":[206],"can":[207],"be":[208,217],"derive":[211],"Fourier-related":[213],"transformations":[214],"even":[218],"not":[219],"libraries,":[223],"e.g.,":[224],"discrete":[226],"sine":[227],"(DST)":[228],"cosine":[230],"(DCT)":[231],"transforms,":[232],"well":[234],"their":[236],"extension":[237],"multiple":[239],"dimensions":[240],"timing.":[244]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
