{"id":"https://openalex.org/W2894590528","doi":"https://doi.org/10.1145/3241793.3241800","title":"FPGA HPC using OpenCL","display_name":"FPGA HPC using OpenCL","publication_year":2018,"publication_date":"2018-06-20","ids":{"openalex":"https://openalex.org/W2894590528","doi":"https://doi.org/10.1145/3241793.3241800","mag":"2894590528"},"language":"en","primary_location":{"id":"doi:10.1145/3241793.3241800","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3241793.3241800","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3241793.3241800","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Symposium on Highly-Efficient Accelerators and Reconfigurable Technologies","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3241793.3241800","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034744458","display_name":"Ahmed Sanaullah","orcid":"https://orcid.org/0000-0002-7169-480X"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ahmed Sanaullah","raw_affiliation_strings":["Department of Electrical and Computer Engineering; Boston University, Boston, MA","Boston University, Boston, MA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering; Boston University, Boston, MA","institution_ids":["https://openalex.org/I111088046"]},{"raw_affiliation_string":"Boston University, Boston, MA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021051610","display_name":"Martin Herbordt","orcid":"https://orcid.org/0000-0002-3443-9113"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Martin C. Herbordt","raw_affiliation_strings":["Department of Electrical and Computer Engineering; Boston University, Boston, MA","Boston University, Boston, MA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering; Boston University, Boston, MA","institution_ids":["https://openalex.org/I111088046"]},{"raw_affiliation_string":"Boston University, Boston, MA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5034744458"],"corresponding_institution_ids":["https://openalex.org/I111088046"],"apc_list":null,"apc_paid":null,"fwci":3.6829,"has_fulltext":true,"cited_by_count":25,"citation_normalized_percentile":{"value":0.93873518,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.9836000204086304,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8458526730537415},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.8331244587898254},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7261836528778076},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7060089111328125},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6119274497032166},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5515605211257935},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.5289114713668823},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4820578992366791},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.43419045209884644},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.40878164768218994},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.21995100378990173},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11760792136192322}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8458526730537415},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.8331244587898254},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7261836528778076},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7060089111328125},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6119274497032166},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5515605211257935},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.5289114713668823},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4820578992366791},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.43419045209884644},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.40878164768218994},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.21995100378990173},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11760792136192322},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C87717796","wikidata":"https://www.wikidata.org/wiki/Q146326","display_name":"Environmental engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3241793.3241800","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3241793.3241800","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3241793.3241800","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Symposium on Highly-Efficient Accelerators and Reconfigurable Technologies","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3241793.3241800","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3241793.3241800","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3241793.3241800","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Symposium on Highly-Efficient Accelerators and Reconfigurable Technologies","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2810384083","display_name":"SHF: Small: Collaborative Research: Coupling Computation and Communication in FPGA-Enhanced Clouds and Clusters","funder_award_id":"1618303","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4980082827","display_name":"II-EN:  Collaborative Research:  Large-Scale FPGA-Centric Cluster with Direct and Programmable Communication","funder_award_id":"1405695","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7041837209","display_name":null,"funder_award_id":"CNS-1405695 , CCF- 1618303/7960","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8709736269","display_name":null,"funder_award_id":"CNS-1405695","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2894590528.pdf","grobid_xml":"https://content.openalex.org/works/W2894590528.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W1805979977","https://openalex.org/W1972620714","https://openalex.org/W2032592607","https://openalex.org/W2086364561","https://openalex.org/W2095874892","https://openalex.org/W2098800684","https://openalex.org/W2118341874","https://openalex.org/W2149507607","https://openalex.org/W2182577486","https://openalex.org/W2184164993","https://openalex.org/W2359097465","https://openalex.org/W2472500612","https://openalex.org/W2535599028","https://openalex.org/W2599597882","https://openalex.org/W2726677039","https://openalex.org/W2752315335","https://openalex.org/W2762515459","https://openalex.org/W2766784169","https://openalex.org/W2906549808","https://openalex.org/W6643337361","https://openalex.org/W6686061617","https://openalex.org/W6726357735","https://openalex.org/W6729404843"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2027972911","https://openalex.org/W2146343568","https://openalex.org/W2013643406","https://openalex.org/W2157978810","https://openalex.org/W2778498407","https://openalex.org/W3012895752"],"abstract_inverted_index":{"FPGAs":[0],"have":[1,35,90],"typically":[2],"achieved":[3],"high":[4],"speedups":[5],"for":[6,27,41,69,128,187,198],"3D":[7,151],"Fast":[8],"Fourier":[9],"Transforms":[10],"(FFTs)":[11],"due":[12,45,73],"to":[13,46,74,118,154],"the":[14,43,47,129,136,140,164],"presence":[15],"of":[16,49,76,109,172,208],"hard":[17],"floating":[18],"point":[19],"units,":[20],"low":[21],"latency":[22],"specialized":[23],"pipelines,":[24],"and":[25,52,64,146,180,190,202,212],"support":[26],"complex":[28],"connectivity":[29],"among":[30],"processing":[31],"elements.":[32],"Previous":[33],"implementations":[34,186],"relied":[36],"on":[37,163],"FFT":[38,71,120,152,185],"IP":[39,59,124,183,218],"cores":[40],"performing":[42],"computation":[44],"complexity":[48],"manually":[50],"developing":[51],"maintaining/upgrading":[53],"efficient":[54],"pipelines":[55,121,197],"in":[56,97],"HDL.":[57],"These":[58],"cores,":[60],"however,":[61],"are":[62],"bulky":[63],"cannot":[65],"be":[66,116,144],"fully":[67],"tuned":[68],"specific":[70],"sizes":[72],"use":[75,205],"generic":[77],"architectures.":[78],"HLS":[79],"tools,":[80],"such":[81],"as":[82],"OpenCL,":[83],"offer":[84],"a":[85,107],"more":[86],"customizable":[87],"alternative":[88],"but":[89],"suffered":[91],"from":[92],"worse":[93],"performance":[94],"than":[95,216],"HDL":[96,137],"previous":[98],"work.":[99],"In":[100],"this":[101],"paper":[102],"we":[103],"show":[104,134],"that,":[105],"using":[106],"set":[108],"code":[110],"structure":[111],"optimizations,":[112],"OpenCL":[113,141,194],"designs":[114,127],"can":[115,143],"compiled":[117],"Radix-2":[119],"which":[122],"outperform":[123],"core":[125,219],"based":[126],"same":[130],"throughput.":[131],"We":[132],"further":[133],"that":[135],"generated":[138,195],"by":[139],"compiler":[142],"isolated":[145],"seamlessly":[147],"integrated":[148],"into":[149],"existing":[150],"shells":[153],"reduce":[155],"implementation":[156],"effort.":[157],"Our":[158],"single":[159],"device":[160],"design,":[161],"tested":[162],"Altera":[165],"Arria10X115":[166],"FPGA,":[167],"achieves":[168],"an":[169,206],"average":[170,207],"speedup":[171],"29x":[173],"vs":[174,177,182],"CPU-MKL,":[175],"4.1x":[176],"GPU":[178],"cuFFT":[179],"1.1x":[181],"Core":[184],"163,":[188,200],"323":[189,201],"643":[191,203],"FFTs.":[192],"Moreover,":[193],"compute":[196],"83,":[199],"FFTs":[204],"7.5x":[209],"fewer":[210,214],"ALMs":[211],"1.6x":[213],"DSPs":[215],"corresponding":[217],"versions.":[220]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":4}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
