{"id":"https://openalex.org/W2140980608","doi":"https://doi.org/10.1145/2304576.2304604","title":"On the communication complexity of 3D FFTs and its implications for Exascale","display_name":"On the communication complexity of 3D FFTs and its implications for Exascale","publication_year":2012,"publication_date":"2012-06-25","ids":{"openalex":"https://openalex.org/W2140980608","doi":"https://doi.org/10.1145/2304576.2304604","mag":"2140980608"},"language":"en","primary_location":{"id":"doi:10.1145/2304576.2304604","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2304576.2304604","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025213872","display_name":"Kenneth Czechowski","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kenneth Czechowski","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA","Georgia Institute of Technology Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015530126","display_name":"Casey Battaglino","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Casey Battaglino","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA","Georgia Institute of Technology Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018427481","display_name":"Chris McClanahan","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chris McClanahan","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA","Georgia Institute of Technology Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027876225","display_name":"Kartik P. Iyer","orcid":"https://orcid.org/0000-0001-7071-9211"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kartik Iyer","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA","Georgia Institute of Technology Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005975375","display_name":"P. K. Yeung","orcid":"https://orcid.org/0000-0002-6278-6032"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"P.-K. Yeung","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA","Georgia Institute of Technology Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016738770","display_name":"Richard Vuduc","orcid":"https://orcid.org/0000-0003-2178-138X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Vuduc","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA","Georgia Institute of Technology Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025213872"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":5.5501,"has_fulltext":false,"cited_by_count":63,"citation_normalized_percentile":{"value":0.9622164,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"205","last_page":"214"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8430560827255249},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8167803287506104},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5838766098022461},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.5753818154335022},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.5248199105262756},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.4959021508693695},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.48827433586120605},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.46154409646987915},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.4573909044265747},{"id":"https://openalex.org/keywords/communication-complexity","display_name":"Communication complexity","score":0.4474928379058838},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4311858117580414},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3586732745170593},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3448406457901001},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.2027856707572937},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.18318316340446472},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.12988072633743286},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10596290230751038},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08539021015167236}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8430560827255249},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8167803287506104},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5838766098022461},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.5753818154335022},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.5248199105262756},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.4959021508693695},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.48827433586120605},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.46154409646987915},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.4573909044265747},{"id":"https://openalex.org/C179145077","wikidata":"https://www.wikidata.org/wiki/Q5154130","display_name":"Communication complexity","level":2,"score":0.4474928379058838},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4311858117580414},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3586732745170593},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3448406457901001},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2027856707572937},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.18318316340446472},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.12988072633743286},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10596290230751038},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08539021015167236},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2304576.2304604","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2304576.2304604","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Supercomputing","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.233.936","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.233.936","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://vuduc.org/pubs/czechowski2012-ics-xfft.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W31232120","https://openalex.org/W47324686","https://openalex.org/W1550054107","https://openalex.org/W1589759851","https://openalex.org/W1800149981","https://openalex.org/W1966416130","https://openalex.org/W1969617278","https://openalex.org/W1972501001","https://openalex.org/W1993834204","https://openalex.org/W1995333355","https://openalex.org/W2000584869","https://openalex.org/W2009516879","https://openalex.org/W2011253351","https://openalex.org/W2015708427","https://openalex.org/W2024639384","https://openalex.org/W2032484463","https://openalex.org/W2038454316","https://openalex.org/W2045579177","https://openalex.org/W2057577013","https://openalex.org/W2070940137","https://openalex.org/W2083221501","https://openalex.org/W2095875205","https://openalex.org/W2105524676","https://openalex.org/W2108896526","https://openalex.org/W2109707129","https://openalex.org/W2113691861","https://openalex.org/W2122814645","https://openalex.org/W2131413854","https://openalex.org/W2135401856","https://openalex.org/W2138690674","https://openalex.org/W2144857621","https://openalex.org/W2146451305","https://openalex.org/W2162186524","https://openalex.org/W2171120359","https://openalex.org/W2188838890","https://openalex.org/W2275977038","https://openalex.org/W3004531541","https://openalex.org/W3139689176","https://openalex.org/W4241101175","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2735130281","https://openalex.org/W1990309876","https://openalex.org/W79990711","https://openalex.org/W4295935130","https://openalex.org/W1480947737","https://openalex.org/W2953368509","https://openalex.org/W4294538768","https://openalex.org/W3010779417","https://openalex.org/W2529649238","https://openalex.org/W1835470271"],"abstract_inverted_index":{"This":[0],"paper":[1],"revisits":[2],"the":[3,72,86,109,120],"communication":[4,36,104],"complexity":[5],"of":[6,75,111,119],"large-scale":[7],"3D":[8],"fast":[9],"Fourier":[10],"transforms":[11],"(FFTs)":[12],"and":[13,34,128],"asks":[14],"what":[15],"impact":[16,74],"trends":[17],"in":[18,117],"current":[19,46,65],"architectures":[20],"will":[21],"have":[22],"on":[23,63],"FFT":[24],"performance":[25,73],"at":[26,60],"exascale.":[27],"We":[28],"analyze":[29],"both":[30],"memory":[31,126],"hierarchy":[32],"traffic":[33],"network":[35,129],"to":[37,53,107],"derive":[38],"suitable":[39],"analytical":[40],"models,":[41],"which":[42],"we":[43,49],"calibrate":[44],"against":[45],"software":[47],"implementations;":[48],"then":[50],"evaluate":[51],"models":[52],"make":[54],"predictions":[55],"about":[56],"potential":[57],"scaling":[58],"outcomes":[59],"exascale,":[61],"based":[62],"extrapolating":[64],"technology":[66],"trends.":[67],"Of":[68],"particular":[69],"interest":[70],"is":[71,99,105],"choosing":[76],"high-density":[77],"processors,":[78],"typified":[79],"today":[80],"by":[81],"graphics":[82],"co-processors":[83],"(GPUs),":[84],"as":[85],"base":[87],"processor":[88],"for":[89],"an":[90],"exascale":[91],"system.":[92],"Among":[93],"various":[94],"observations,":[95],"a":[96,132],"key":[97],"prediction":[98],"that":[100],"although":[101],"inter-node":[102],"all-to-all":[103],"expected":[106],"be":[108],"bottleneck":[110],"distributed":[112],"FFTs,":[113],"intra-node":[114],"communication---expressed":[115],"precisely":[116],"terms":[118],"relative":[121],"balance":[122],"among":[123],"compute":[124],"capacity,":[125],"bandwidth,":[127],"bandwidth---will":[130],"play":[131],"critical":[133],"role.":[134]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
