{"id":"https://openalex.org/W3006884211","doi":"https://doi.org/10.1145/3373087.3375341","title":"Performance Evaluation and Power Analysis of Teraflop-scale Fluid Simulation with Stratix 10 FPGA","display_name":"Performance Evaluation and Power Analysis of Teraflop-scale Fluid Simulation with Stratix 10 FPGA","publication_year":2020,"publication_date":"2020-02-23","ids":{"openalex":"https://openalex.org/W3006884211","doi":"https://doi.org/10.1145/3373087.3375341","mag":"3006884211"},"language":"en","primary_location":{"id":"doi:10.1145/3373087.3375341","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3373087.3375341","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002541691","display_name":"Atsushi Koshiba","orcid":"https://orcid.org/0000-0001-5439-4357"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Atsushi Koshiba","raw_affiliation_strings":["RIKEN Center for Computational Science, Kobe, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Kobe, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113825702","display_name":"Kouki Watanabe","orcid":null},"institutions":[{"id":"https://openalex.org/I201537933","display_name":"Tohoku University","ror":"https://ror.org/01dq60k83","country_code":"JP","type":"education","lineage":["https://openalex.org/I201537933"]},{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kouki Watanabe","raw_affiliation_strings":["RIKEN Center for Computational Science, Tohoku University, Sendai, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Tohoku University, Sendai, Japan","institution_ids":["https://openalex.org/I201537933","https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022019173","display_name":"Takaaki Miyajima","orcid":"https://orcid.org/0000-0001-9409-0913"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takaaki Miyajima","raw_affiliation_strings":["RIKEN Center for Computational Science, Kobe, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Kobe, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081752237","display_name":"Kentaro Sano","orcid":"https://orcid.org/0000-0002-6681-4192"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kentaro Sano","raw_affiliation_strings":["RIKEN Center for Computational Science, Kobe, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Kobe, Japan","institution_ids":["https://openalex.org/I4210129730"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5002541691"],"corresponding_institution_ids":["https://openalex.org/I4210129730"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02526132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"321","last_page":"321"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12810","display_name":"Real-time simulation and control systems","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stratix","display_name":"Stratix","score":0.9624174237251282},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7845844626426697},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7828807830810547},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6872352957725525},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6823443174362183},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.6667392253875732},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.5719926357269287},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5493577718734741},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4860425591468811},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.45153504610061646},{"id":"https://openalex.org/keywords/clock-rate","display_name":"Clock rate","score":0.4429289698600769},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.34093421697616577},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.33794355392456055},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19805359840393066},{"id":"https://openalex.org/keywords/chip","display_name":"Chip","score":0.12209904193878174}],"concepts":[{"id":"https://openalex.org/C2776277307","wikidata":"https://www.wikidata.org/wiki/Q22074755","display_name":"Stratix","level":3,"score":0.9624174237251282},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7845844626426697},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7828807830810547},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6872352957725525},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6823443174362183},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.6667392253875732},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.5719926357269287},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5493577718734741},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4860425591468811},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.45153504610061646},{"id":"https://openalex.org/C178693496","wikidata":"https://www.wikidata.org/wiki/Q911691","display_name":"Clock rate","level":3,"score":0.4429289698600769},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.34093421697616577},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.33794355392456055},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19805359840393066},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.12209904193878174},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3373087.3375341","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3373087.3375341","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1855765675","https://openalex.org/W3046859795","https://openalex.org/W1505902692","https://openalex.org/W111037196","https://openalex.org/W2560894929","https://openalex.org/W2472667575","https://openalex.org/W1861262881","https://openalex.org/W2156524298","https://openalex.org/W2106942255","https://openalex.org/W3010779629"],"abstract_inverted_index":{"Stream":[0],"computing":[1],"is":[2,85,212],"a":[3,73,88,114,190,194,199],"suitable":[4],"approach":[5],"to":[6,91,159,182],"improve":[7,92,175],"both":[8,120],"performance":[9,21,134],"and":[10,24,33,84,141,168,208,214],"power":[11,136],"efficiency":[12,137],"of":[13,38,45,76,81,162,171],"numerical":[14],"computations":[15],"with":[16,50,87,119,229,236],"FPGAs.":[17],"To":[18,100,174],"achieve":[19],"further":[20],"gain,":[22],"temporal":[23],"spatial":[25],"parallelism":[26,46,121],"were":[27,47],"exploited:":[28],"the":[29,34,66,93,102,147,169,176,180,184,201,218,226],"first":[30],"one":[31],"deepens":[32],"latter":[35],"duplicates":[36],"pipelines":[37],"streamed":[39,115],"computation":[40],"cores.":[41,173],"These":[42],"two":[43],"types":[44,122],"previously":[48],"evaluated":[49,129],"Arria":[51,82],"10":[52,231],"FPGA.":[53],"However,":[54],"it":[55,130,241],"has":[56,72],"not":[57],"been":[58],"verified":[59],"if":[60],"they":[61],"are":[62],"also":[63],"effective":[64],"for":[65,104,123,242],"latest":[67],"FPGA,":[68],"Stratix":[69,124,230],"10,":[70],"which":[71,165,211],"larger":[74],"amount":[75],"logic":[77],"elements":[78],"(i.e.,":[79,97],"2.4X":[80],"10)":[83],"equipped":[86],"new":[89],"feature":[90],"maximum":[94,142],"clock":[95,143],"frequency":[96,144],"HyperFlex":[98,243],"architecture).":[99],"show":[101],"scalability":[103],"such":[105],"state-of-the-art":[106],"FPGAs,":[107],"in":[108,193],"this":[109,152],"paper,":[110],"we":[111,149,178,223],"firstly":[112],"implemented":[113],"fluid":[116,203,227],"simulation":[117,204,228],"accelerator":[118],"10.":[125],"We":[126],"then":[127],"thoroughly":[128],"by":[131,188,239],"obtaining":[132],"computational":[133],"(FLOPS),":[135],"(FLOPS/W),":[138],"resource":[139],"utilization,":[140],"(Fmax).":[145],"From":[146],"results,":[148],"found":[150],"that":[151,225],"implementation":[153,181,235],"excessively":[154],"used":[155],"DSP":[156,185,196],"blocks":[157],"due":[158],"inefficient":[160],"mapping":[161],"floating-point":[163],"operations,":[164],"reduced":[166],"Fmax":[167],"number":[170],"pipelined":[172],"scalability,":[177],"optimized":[179,202],"reduce":[183],"block":[186],"usage":[187],"utilizing":[189],"Multiply-Add":[191],"function":[192],"single":[195],"block.":[197],"As":[198],"result,":[200],"achieves":[205],"1.06":[206],"TFLOPS":[207],"12.6":[209],"GFLOPS/W,":[210],"1.36X":[213],"1.24X":[215],"higher":[216],"than":[217],"non-optimized":[219],"version,":[220],"respectively.":[221],"Moreover,":[222],"estimate":[224],"could":[232],"outperform":[233],"GPU-based":[234],"Tesla":[237],"V100":[238],"optimizing":[240],"architecture.":[244]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
