{"id":"https://openalex.org/W3145767355","doi":"https://doi.org/10.1109/sc.2008.5213210","title":"Bandwidth intensive 3-D FFT kernel for GPUs using CUDA","display_name":"Bandwidth intensive 3-D FFT kernel for GPUs using CUDA","publication_year":2008,"publication_date":"2008-11-01","ids":{"openalex":"https://openalex.org/W3145767355","doi":"https://doi.org/10.1109/sc.2008.5213210","mag":"3145767355"},"language":"en","primary_location":{"id":"doi:10.1109/sc.2008.5213210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sc.2008.5213210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 SC - International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006972521","display_name":"Akira Nukada","orcid":"https://orcid.org/0000-0001-7959-6975"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"A. Nukada","raw_affiliation_strings":["Japan Science and Technology Agency, Kawaguchi, Saitama, Japan","Tokyo Institute of Technology, Meguro, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Science and Technology Agency, Kawaguchi, Saitama, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Meguro, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110211416","display_name":"Yasuhiko Ogata","orcid":null},"institutions":[{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]},{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Y. Ogata","raw_affiliation_strings":["Japan Science and Technology Agency, Kawaguchi, Saitama, Japan","Tokyo Institute of Technology, Meguro, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Science and Technology Agency, Kawaguchi, Saitama, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Meguro, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011254074","display_name":"Toshio Endo","orcid":"https://orcid.org/0000-0001-7297-6211"},"institutions":[{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]},{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"T. Endo","raw_affiliation_strings":["Japan Science and Technology Agency, Kawaguchi, Saitama, Japan","Tokyo Institute of Technology, Meguro, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Science and Technology Agency, Kawaguchi, Saitama, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Meguro, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103252086","display_name":"Satoshi Matsuoka","orcid":"https://orcid.org/0000-0003-2126-2926"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]},{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"S. Matsuoka","raw_affiliation_strings":["Japan Science and Technology Agency, Kawaguchi, Saitama, Japan","National Institute of Information, Chiyoda, Tokyo, Japan","Tokyo Institute of Technology, Meguro, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Science and Technology Agency, Kawaguchi, Saitama, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"National Institute of Information, Chiyoda, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Meguro, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5006972521"],"corresponding_institution_ids":["https://openalex.org/I114531698","https://openalex.org/I4210086780"],"apc_list":null,"apc_paid":null,"fwci":10.3983,"has_fulltext":false,"cited_by_count":69,"citation_normalized_percentile":{"value":0.98504207,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8508840799331665},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7344043850898743},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7223011255264282},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.6977720260620117},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.6085540652275085},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.5812453031539917},{"id":"https://openalex.org/keywords/high-memory","display_name":"High memory","score":0.5658606290817261},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5531408786773682},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.5092204809188843},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4938238561153412},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.1489105224609375},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.12213963270187378},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09476035833358765}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8508840799331665},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7344043850898743},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7223011255264282},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.6977720260620117},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.6085540652275085},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.5812453031539917},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.5658606290817261},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5531408786773682},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.5092204809188843},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4938238561153412},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.1489105224609375},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.12213963270187378},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09476035833358765},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sc.2008.5213210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sc.2008.5213210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 SC - International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320308943","display_name":"Microsoft Research","ror":"https://ror.org/00d0nc645"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320323954","display_name":"University of Tsukuba","ror":"https://ror.org/02956yf07"},{"id":"https://openalex.org/F4320338075","display_name":"Core Research for Evolutional Science and Technology","ror":"https://ror.org/00097mb19"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W123564486","https://openalex.org/W2003324319","https://openalex.org/W2027926931","https://openalex.org/W2032309817","https://openalex.org/W2036148351","https://openalex.org/W2058645184","https://openalex.org/W2108792719","https://openalex.org/W2127091535","https://openalex.org/W2127391977","https://openalex.org/W2133712572","https://openalex.org/W2134572726","https://openalex.org/W2135401856","https://openalex.org/W2163229756","https://openalex.org/W2171473263","https://openalex.org/W2323594350","https://openalex.org/W4241667468","https://openalex.org/W6604995708","https://openalex.org/W6654554885","https://openalex.org/W6684857869"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825"],"abstract_inverted_index":{"Most":[0],"GPU":[1,77,148],"performance":[2,119],"\u201chypes\u201d":[3],"have":[4,32],"focused":[5],"around":[6],"tightly-coupled":[7],"applications":[8,110],"with":[9,134],"small":[10],"memory":[11,25,87,103],"bandwidth":[12,123],"requirements":[13],"e.g.,":[14],"N-body,":[15],"but":[16],"GPUs":[17,65],"are":[18,71],"also":[19],"commodity":[20],"vector":[21],"machines":[22],"sporting":[23],"substantial":[24],"bandwidth;":[26],"however,":[27],"effective":[28],"programming":[29,69],"methodologies":[30],"thereof":[31],"been":[33],"poorly":[34],"studied.":[35],"Our":[36,105],"new":[37],"3-D":[38],"FFT":[39,62],"kernel,":[40],"written":[41],"in":[42,116],"NVIDIA":[43],"CUDA,":[44],"achieves":[45,111],"nearly":[46],"80":[47],"GFLOPS":[48],"on":[49,64],"a":[50],"top-end":[51],"GPU,":[52],"being":[53,144],"more":[54],"than":[55,59],"three":[56],"times":[57],"faster":[58,147],"any":[60],"existing":[61],"implementations":[63],"including":[66,84],"CUFFT.":[67],"Careful":[68],"techniques":[70],"employed":[72],"to":[73,108],"fully":[74],"exploit":[75],"modern":[76],"hardware":[78],"characteristics":[79],"while":[80,141],"overcoming":[81],"their":[82],"limitations,":[83],"on-chip":[85],"shared":[86],"utilization,":[88],"optimizing":[89],"the":[90,139],"number":[91],"of":[92,113,146],"threads":[93],"and":[94,99],"registers":[95],"through":[96],"appropriate":[97],"localization,":[98],"avoiding":[100],"low-speed":[101],"stride":[102],"accesses.":[104],"kernel":[106],"applied":[107],"real":[109],"orders":[112],"magnitude":[114],"boost":[115],"power&cost":[117],"vs.":[118],"metrics.":[120],"The":[121],"off-card":[122],"limitation":[124],"is":[125],"still":[126],"an":[127],"issue,":[128],"which":[129],"could":[130],"be":[131],"alleviated":[132],"somewhat":[133],"application":[135],"kernels":[136],"confinement":[137],"within":[138],"card,":[140],"ideal":[142],"solution":[143],"facilitation":[145],"interfaces.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":13},{"year":2012,"cited_by_count":8}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
