{"id":"https://openalex.org/W2111946172","doi":"https://doi.org/10.1145/2503210.2503242","title":"Tera-scale 1D FFT with low-communication algorithm and Intel\u00ae Xeon Phi\u2122 coprocessors","display_name":"Tera-scale 1D FFT with low-communication algorithm and Intel\u00ae Xeon Phi\u2122 coprocessors","publication_year":2013,"publication_date":"2013-10-30","ids":{"openalex":"https://openalex.org/W2111946172","doi":"https://doi.org/10.1145/2503210.2503242","mag":"2111946172"},"language":"en","primary_location":{"id":"doi:10.1145/2503210.2503242","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2503210.2503242","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101876582","display_name":"Jongsoo Park","orcid":"https://orcid.org/0000-0002-4750-9440"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jongsoo Park","raw_affiliation_strings":["Parallel Computing Lab"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086155148","display_name":"Ganesh Bikshandi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ganesh Bikshandi","raw_affiliation_strings":["Parallel Computing Lab"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110182945","display_name":"Karthikeyan Vaidyanathan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karthikeyan Vaidyanathan","raw_affiliation_strings":["Parallel Computing Lab"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101748090","display_name":"Ping Tang","orcid":"https://orcid.org/0000-0002-8721-4209"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ping Tak Peter Tang","raw_affiliation_strings":["Software and Service Group, Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Software and Service Group, Intel Corporation","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032238070","display_name":"Pradeep Dubey","orcid":"https://orcid.org/0000-0001-5853-0619"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pradeep Dubey","raw_affiliation_strings":["Parallel Computing Lab"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100374575","display_name":"Daehyun Kim","orcid":"https://orcid.org/0000-0002-5582-3579"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daehyun Kim","raw_affiliation_strings":["Parallel Computing Lab"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101876582"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.6194,"has_fulltext":false,"cited_by_count":32,"citation_normalized_percentile":{"value":0.96964557,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"19","issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9807999730110168,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.9132840633392334},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8294436931610107},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8152871131896973},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.8068670034408569},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.7655692100524902},{"id":"https://openalex.org/keywords/tera","display_name":"Tera-","score":0.7484934329986572},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.7070642709732056},{"id":"https://openalex.org/keywords/pentium","display_name":"Pentium","score":0.6272923350334167},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5300312638282776},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5172677636146545},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.48431387543678284},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.444180965423584},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.34737104177474976},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.14478111267089844}],"concepts":[{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.9132840633392334},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8294436931610107},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8152871131896973},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.8068670034408569},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.7655692100524902},{"id":"https://openalex.org/C2775904623","wikidata":"https://www.wikidata.org/wiki/Q108529","display_name":"Tera-","level":2,"score":0.7484934329986572},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.7070642709732056},{"id":"https://openalex.org/C46268123","wikidata":"https://www.wikidata.org/wiki/Q214314","display_name":"Pentium","level":2,"score":0.6272923350334167},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5300312638282776},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5172677636146545},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.48431387543678284},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.444180965423584},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34737104177474976},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.14478111267089844},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2503210.2503242","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2503210.2503242","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W176957146","https://openalex.org/W288065879","https://openalex.org/W1543026279","https://openalex.org/W1589759851","https://openalex.org/W1638046464","https://openalex.org/W1967802285","https://openalex.org/W1997451861","https://openalex.org/W2003790405","https://openalex.org/W2032484463","https://openalex.org/W2035975621","https://openalex.org/W2045836768","https://openalex.org/W2061171222","https://openalex.org/W2072897844","https://openalex.org/W2082695854","https://openalex.org/W2085164477","https://openalex.org/W2097982649","https://openalex.org/W2102182691","https://openalex.org/W2104373803","https://openalex.org/W2108157916","https://openalex.org/W2108600626","https://openalex.org/W2108872752","https://openalex.org/W2112482891","https://openalex.org/W2129889687","https://openalex.org/W2134572726","https://openalex.org/W2148053845","https://openalex.org/W2170996201","https://openalex.org/W2620167579","https://openalex.org/W3100417409","https://openalex.org/W4245007865"],"related_works":["https://openalex.org/W2213533160","https://openalex.org/W4252450863","https://openalex.org/W2467043670","https://openalex.org/W187726678","https://openalex.org/W2051078434","https://openalex.org/W2085105049","https://openalex.org/W3203561460","https://openalex.org/W2682544458","https://openalex.org/W3009624197","https://openalex.org/W4251138667"],"abstract_inverted_index":{"This":[0],"paper":[1],"demonstrates":[2],"the":[3,34,72],"first":[4],"tera-scale":[5],"performance":[6,19,27,118],"of":[7,22,42,61,108],"Intel\u00ae":[8,62],"Xeon":[9,43,126],"Phi\u2122":[10],"coprocessors":[11],"on":[12,37,57,125],"1D":[13],"FFT":[14,82,124],"computations.":[15],"Applying":[16],"a":[17,38,58,67,86,132],"disciplined":[18],"programming":[20],"methodology":[21],"sound":[23],"algorithm":[24,111],"choice,":[25],"valid":[26],"model,":[28],"and":[29,45,96,112,138],"well-executed":[30],"optimizations,":[31],"we":[32],"break":[33],"tera-flop":[35],"mark":[36],"mere":[39],"64":[40],"nodes":[41],"Phi":[44],"reach":[46],"6.7":[47],"TFLOPS":[48],"with":[49,91],"512":[50],"nodes,":[51],"which":[52],"is":[53,66,119],"1.5x":[54],"than":[55],"achievable":[56],"same":[59],"number":[60],"Xeon\u00ae":[63],"nodes.":[64],"It":[65],"challenge":[68],"to":[69,122],"fully":[70],"utilize":[71],"compute":[73],"capability":[74],"presented":[75],"by":[76],"many-core":[77],"wide-vector":[78],"processors":[79],"for":[80,116,134,139],"bandwidth-bound":[81,136],"computation.":[83],"We":[84],"leverage":[85],"new":[87],"algorithm,":[88],"Segment-of-Interest":[89],"FFT,":[90],"low":[92,109],"inter-node":[93],"communication":[94,110,146],"cost,":[95],"aggressively":[97],"optimize":[98],"data":[99],"movements":[100],"in":[101],"node-local":[102],"computations,":[103],"exploiting":[104],"caches.":[105],"Our":[106],"coordination":[107],"massively":[113],"parallel":[114],"architecture":[115],"scalable":[117],"not":[120],"limited":[121],"running":[123],"Phi;":[127],"it":[128],"can":[129],"serve":[130],"as":[131],"reference":[133],"other":[135],"computations":[137],"emerging":[140],"HPC":[141],"systems":[142],"that":[143],"are":[144],"increasingly":[145],"limited.":[147]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":9},{"year":2014,"cited_by_count":8}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
