{"id":"https://openalex.org/W2785874054","doi":"https://doi.org/10.1145/3174243.3174248","title":"Combined Spatial and Temporal Blocking for High-Performance Stencil Computation on FPGAs Using OpenCL","display_name":"Combined Spatial and Temporal Blocking for High-Performance Stencil Computation on FPGAs Using OpenCL","publication_year":2018,"publication_date":"2018-02-15","ids":{"openalex":"https://openalex.org/W2785874054","doi":"https://doi.org/10.1145/3174243.3174248","mag":"2785874054"},"language":"en","primary_location":{"id":"doi:10.1145/3174243.3174248","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3174243.3174248","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3174248&type=pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=3174248&type=pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Hamid Reza Zohouri","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hamid Reza Zohouri","raw_affiliation_strings":["Tokyo Institute of Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Artur Podobas","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Artur Podobas","raw_affiliation_strings":["Tokyo Institute of Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":null,"display_name":"Satoshi Matsuoka","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Matsuoka","raw_affiliation_strings":["Tokyo Institute of Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I114531698"],"apc_list":null,"apc_paid":null,"fwci":13.3993,"has_fulltext":true,"cited_by_count":66,"citation_normalized_percentile":{"value":0.9928456,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"153","last_page":"162"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.9732999801635742},{"id":"https://openalex.org/keywords/stratix","display_name":"Stratix","score":0.8055999875068665},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.6862000226974487},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6104999780654907},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.535099983215332},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4641000032424927},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4429999887943268},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.3481000065803528}],"concepts":[{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.9732999801635742},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8238999843597412},{"id":"https://openalex.org/C2776277307","wikidata":"https://www.wikidata.org/wiki/Q22074755","display_name":"Stratix","level":3,"score":0.8055999875068665},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.6862000226974487},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6104999780654907},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5860000252723694},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.535099983215332},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4641000032424927},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4507000148296356},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4429999887943268},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3903999924659729},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.37459999322891235},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.3481000065803528},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3255999982357025},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.31690001487731934},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.27390000224113464},{"id":"https://openalex.org/C142962650","wikidata":"https://www.wikidata.org/wiki/Q240838","display_name":"Reconfigurable computing","level":3,"score":0.27390000224113464},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.27320000529289246},{"id":"https://openalex.org/C149810388","wikidata":"https://www.wikidata.org/wiki/Q5374873","display_name":"Emulation","level":2,"score":0.2676999866962433}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3174243.3174248","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3174243.3174248","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3174248&type=pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1802.00438","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1802.00438","pdf_url":"https://arxiv.org/pdf/1802.00438","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3174243.3174248","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3174243.3174248","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3174248&type=pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2792802287","display_name":null,"funder_award_id":"KAKENHI","funder_id":"https://openalex.org/F4320338075","funder_display_name":"Core Research for Evolutional Science and Technology"},{"id":"https://openalex.org/G3282004645","display_name":null,"funder_award_id":"JPMJCR","funder_id":"https://openalex.org/F4320338075","funder_display_name":"Core Research for Evolutional Science and Technology"},{"id":"https://openalex.org/G3450459968","display_name":null,"funder_award_id":"JPMJCR1303","funder_id":"https://openalex.org/F4320338075","funder_display_name":"Core Research for Evolutional Science and Technology"},{"id":"https://openalex.org/G6718509927","display_name":null,"funder_award_id":"CREST","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7256519854","display_name":null,"funder_award_id":"JST-CREST","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7879866177","display_name":null,"funder_award_id":"CREST","funder_id":"https://openalex.org/F4320320912","funder_display_name":"Ministry of Education, Culture, Sports, Science and Technology"},{"id":"https://openalex.org/G7952859799","display_name":"Accelerating High-Performance Computing Application Kernels Through Reconfigurable Hardware","funder_award_id":"16F16764","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320320912","display_name":"Ministry of Education, Culture, Sports, Science and Technology","ror":"https://ror.org/048rj2z13"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320338075","display_name":"Core Research for Evolutional Science and Technology","ror":"https://ror.org/00097mb19"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2785874054.pdf","grobid_xml":"https://content.openalex.org/works/W2785874054.grobid-xml"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W1964192298","https://openalex.org/W2002862759","https://openalex.org/W2023143645","https://openalex.org/W2037274813","https://openalex.org/W2039378765","https://openalex.org/W2080592089","https://openalex.org/W2221279259","https://openalex.org/W2296730406","https://openalex.org/W2491444920","https://openalex.org/W2529513625","https://openalex.org/W2583383421","https://openalex.org/W2604282092","https://openalex.org/W2626476365"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"developments":[1],"in":[2,31],"High":[3],"Level":[4],"Synthesis":[5],"tools":[6],"have":[7],"attracted":[8],"software":[9],"programmers":[10],"to":[11,52,84,94,116,138,176],"accelerate":[12],"their":[13],"high-performance":[14],"computing":[15],"applications":[16],"on":[17],"FPGAs.":[18],"Even":[19],"though":[20],"it":[21],"has":[22],"been":[23],"shown":[24],"that":[25,70,164],"FPGAs":[26],"can":[27,135,170],"compete":[28],"with":[29],"GPUs":[30],"terms":[32],"of":[33,143,156,174],"performance":[34,73,110,118,155,173],"for":[35,68,119,146,182],"stencil":[36,62,186],"computation,":[37,187],"most":[38],"previous":[39],"work":[40,58],"achieve":[41,171],"this":[42,57],"by":[43,108],"avoiding":[44],"spatial":[45,80],"blocking":[46,83],"and":[47,81,89,140,148,179,184],"restricting":[48],"input":[49,86],"dimensions":[50],"relative":[51],"FPGA":[53,66],"on-chip":[54],"memory.":[55],"In":[56],"we":[59,113,162],"create":[60],"a":[61,157,172],"accelerator":[63,134],"using":[64],"Intel":[65,122],"SDK":[67],"OpenCL":[69],"achieves":[71],"high":[72],"without":[74],"having":[75],"such":[76],"restrictions.":[77],"We":[78],"combine":[79],"temporal":[82],"avoid":[85],"size":[87],"restrictions,":[88],"employ":[90],"multiple":[91],"FPGA-specific":[92],"optimizations":[93],"tackle":[95],"issues":[96],"arisen":[97],"from":[98],"the":[99,120,154,165],"added":[100],"design":[101],"complexity.":[102],"Accelerator":[103],"parameter":[104],"tuning":[105],"is":[106],"guided":[107],"our":[109,133],"model,":[111],"which":[112,152],"also":[114],"use":[115],"project":[117],"upcoming":[121,166],"Stratix":[123,167],"10":[124,129,168],"devices.":[125],"On":[126],"an":[127],"Arria":[128],"GX":[130],"1150":[131],"device,":[132],"reach":[136],"up":[137,175],"760":[139],"375":[141],"GFLOP/s":[142],"compute":[144],"performance,":[145],"2D":[147,183],"3D":[149,185],"stencils,":[150],"respectively,":[151],"rivals":[153],"highly-optimized":[158],"GPU":[159],"implementation.":[160],"Furthermore,":[161],"estimate":[163],"devices":[169],"3.5":[177],"TFLOP/s":[178,181],"1.6":[180],"respectively.":[188]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":19},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":7}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2018-02-23T00:00:00"}
