{"id":"https://openalex.org/W2558453873","doi":"https://doi.org/10.1109/hpec.2016.7761606","title":"On-chip memory efficient data layout for 2D FFT on 3D memory integrated FPGA","display_name":"On-chip memory efficient data layout for 2D FFT on 3D memory integrated FPGA","publication_year":2016,"publication_date":"2016-09-01","ids":{"openalex":"https://openalex.org/W2558453873","doi":"https://doi.org/10.1109/hpec.2016.7761606","mag":"2558453873"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2016.7761606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2016.7761606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000109853","display_name":"Shreyas G. Singapura","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shreyas G. Singapura","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042560222","display_name":"Rajgopal Kannan","orcid":"https://orcid.org/0000-0001-8736-3012"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajgopal Kannan","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033166029","display_name":"Viktor K. Prasanna","orcid":"https://orcid.org/0000-0002-1609-8589"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Viktor K. Prasanna","raw_affiliation_strings":["Ming Hsieh Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"Ming Hsieh Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5000109853"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":0.946,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.74976491,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"34","issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8146226406097412},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.7740435600280762},{"id":"https://openalex.org/keywords/registered-memory","display_name":"Registered memory","score":0.5910530686378479},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5575827360153198},{"id":"https://openalex.org/keywords/flat-memory-model","display_name":"Flat memory model","score":0.5536473989486694},{"id":"https://openalex.org/keywords/chip","display_name":"Chip","score":0.5497735142707825},{"id":"https://openalex.org/keywords/interleaved-memory","display_name":"Interleaved memory","score":0.5460044741630554},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5348922610282898},{"id":"https://openalex.org/keywords/memory-refresh","display_name":"Memory refresh","score":0.5053580403327942},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.49466273188591003},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.47180238366127014},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4643780291080475},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.45372796058654785},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.4534880518913269},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.44392716884613037},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.4383623003959656},{"id":"https://openalex.org/keywords/high-memory","display_name":"High memory","score":0.43758314847946167},{"id":"https://openalex.org/keywords/physical-address","display_name":"Physical address","score":0.4212706983089447},{"id":"https://openalex.org/keywords/computer-memory","display_name":"Computer memory","score":0.3083259165287018},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.12354797124862671},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10447454452514648}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8146226406097412},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.7740435600280762},{"id":"https://openalex.org/C93446704","wikidata":"https://www.wikidata.org/wiki/Q449328","display_name":"Registered memory","level":3,"score":0.5910530686378479},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5575827360153198},{"id":"https://openalex.org/C57863822","wikidata":"https://www.wikidata.org/wiki/Q905488","display_name":"Flat memory model","level":4,"score":0.5536473989486694},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.5497735142707825},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.5460044741630554},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5348922610282898},{"id":"https://openalex.org/C87907426","wikidata":"https://www.wikidata.org/wiki/Q6815755","display_name":"Memory refresh","level":4,"score":0.5053580403327942},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.49466273188591003},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.47180238366127014},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4643780291080475},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.45372796058654785},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.4534880518913269},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.44392716884613037},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.4383623003959656},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.43758314847946167},{"id":"https://openalex.org/C41036726","wikidata":"https://www.wikidata.org/wiki/Q844824","display_name":"Physical address","level":3,"score":0.4212706983089447},{"id":"https://openalex.org/C92855701","wikidata":"https://www.wikidata.org/wiki/Q5830907","display_name":"Computer memory","level":3,"score":0.3083259165287018},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.12354797124862671},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10447454452514648},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec.2016.7761606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2016.7761606","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1851318392","https://openalex.org/W2002831810","https://openalex.org/W2013435193","https://openalex.org/W2020141429","https://openalex.org/W2025265972","https://openalex.org/W2028987224","https://openalex.org/W2034861439","https://openalex.org/W2061171222","https://openalex.org/W2097772361","https://openalex.org/W2128414660","https://openalex.org/W2150909864","https://openalex.org/W2153215457","https://openalex.org/W2168579934","https://openalex.org/W2223585171","https://openalex.org/W2250217037","https://openalex.org/W2293206841","https://openalex.org/W3146763006","https://openalex.org/W6655086935","https://openalex.org/W6656820944","https://openalex.org/W6659004162","https://openalex.org/W6682164510"],"related_works":["https://openalex.org/W2491097902","https://openalex.org/W1554378476","https://openalex.org/W321331545","https://openalex.org/W2354036839","https://openalex.org/W2117116543","https://openalex.org/W2138825797","https://openalex.org/W2005166481","https://openalex.org/W1993089791","https://openalex.org/W2782503170","https://openalex.org/W2558453873"],"abstract_inverted_index":{"3D":[0,25,80,102,133,161,234],"memories":[1,26],"are":[2,104],"becoming":[3],"viable":[4],"solutions":[5],"for":[6,36,76,118,148,188],"the":[7,13,50,54,61,66,141,170,174,189,198,228],"memory":[8,17,55,81,88,103,134,138,162,187,194,199],"wall":[9],"problem":[10,154,191],"and":[11,109,123,144,156,177,193],"meeting":[12],"bandwidth":[14,22,147,200],"requirements":[15],"of":[16,53,65,172,181],"intensive":[18],"applications.":[19,38],"The":[20,128],"high":[21],"provided":[23],"by":[24],"does":[27],"not":[28],"translate":[29],"to":[30,101,111,115,139,220],"a":[31,57,73,160,203],"proportional":[32],"increase":[33],"in":[34,159,206,223],"performance":[35],"all":[37],"For":[39],"an":[40,149],"application":[41],"such":[42],"as":[43,90,92],"2D":[44,77,182,230],"FFT":[45,78,153,176,179,183,231],"with":[46,184,211,227],"strided":[47,124,142],"access":[48],"patterns,":[49],"data":[51,74,95,167,216],"layout":[52,75,96,168,217],"has":[56],"significant":[58],"impact":[59],"on":[60,79,132,233],"total":[62],"execution":[63,224],"time":[64,225],"implementation.":[67],"In":[68],"this":[69],"paper,":[70],"we":[71],"present":[72],"integrated":[82],"FPGA":[83],"that":[84,98],"is":[85],"both":[86,119,173],"on-chip":[87,137,186,207,213],"efficient":[89],"well":[91],"throughput-optimal.":[93],"Our":[94,165],"ensures":[97],"consecutive":[99],"accesses":[100,143],"sufficiently":[105],"interleaved":[106],"among":[107],"layers":[108],"vaults":[110],"absorb":[112],"latency":[113],"due":[114],"activation":[116],"overheads":[117],"sequential":[120],"(Row":[121],"FFT)":[122,126],"(Column":[125],"accesses.":[127],"current":[129],"state-of-the-art":[130],"implementation":[131,232],"requires":[135],"O(\u221acN)":[136],"reduce":[140],"achieve":[145],"maximum":[146],"N":[150,152],"\u00d7":[151],"size":[155,192],"c":[157],"columns":[158],"bank":[163],"row.":[164],"proposed":[166],"optimizes":[169],"throughput":[171],"Row":[175],"Column":[178],"phases":[180],"O(N)":[185],"same":[190],"parameters":[195],"without":[196],"decreasing":[197],"thereby":[201],"achieving":[202],"\u221ac\u00d7":[204],"reduction":[205],"memory.":[208,235],"On":[209],"architectures":[210],"limited":[212],"memory,":[214],"our":[215],"achieves":[218],"2\u00d7":[219],"4\u00d7":[221],"improvement":[222],"compared":[226],"state-of-art":[229]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
