{"id":"https://openalex.org/W2911554528","doi":"https://doi.org/10.1109/reconfig.2018.8641693","title":"Evaluating Floating-point Intensive Applications on OpenCL FPGA Platforms: A Case Study on the SimpleMOC Kernel","display_name":"Evaluating Floating-point Intensive Applications on OpenCL FPGA Platforms: A Case Study on the SimpleMOC Kernel","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2911554528","doi":"https://doi.org/10.1109/reconfig.2018.8641693","mag":"2911554528"},"language":"en","primary_location":{"id":"doi:10.1109/reconfig.2018.8641693","is_oa":false,"landing_page_url":"https://doi.org/10.1109/reconfig.2018.8641693","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 International Conference on ReConFigurable Computing and FPGAs (ReConFig)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101790023","display_name":"Zheming Jin","orcid":"https://orcid.org/0000-0002-7197-780X"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zheming Jin","raw_affiliation_strings":["Leadership Computing Facility, Argonne National Laboratory, Argonne, IL, USA"],"affiliations":[{"raw_affiliation_string":"Leadership Computing Facility, Argonne National Laboratory, Argonne, IL, USA","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050512119","display_name":"Hal Finkel","orcid":"https://orcid.org/0000-0002-7551-7122"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hal Finkel","raw_affiliation_strings":["Leadership Computing Facility, Argonne National Laboratory, Argonne, IL, USA"],"affiliations":[{"raw_affiliation_string":"Leadership Computing Facility, Argonne National Laboratory, Argonne, IL, USA","institution_ids":["https://openalex.org/I1282105669"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101790023"],"corresponding_institution_ids":["https://openalex.org/I1282105669"],"apc_list":null,"apc_paid":null,"fwci":0.2525,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54351014,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8333621025085449},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7400606274604797},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6848604083061218},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6094791889190674},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.49449488520622253},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.48846498131752014},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.4858253002166748},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.48042941093444824},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4223957359790802},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.4107290506362915},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2442813217639923}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8333621025085449},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7400606274604797},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6848604083061218},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6094791889190674},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.49449488520622253},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.48846498131752014},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.4858253002166748},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.48042941093444824},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4223957359790802},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4107290506362915},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2442813217639923},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/reconfig.2018.8641693","is_oa":false,"landing_page_url":"https://doi.org/10.1109/reconfig.2018.8641693","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 International Conference on ReConFigurable Computing and FPGAs (ReConFig)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320338284","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1585509108","https://openalex.org/W1971013940","https://openalex.org/W1984222112","https://openalex.org/W2021295531","https://openalex.org/W2043957526","https://openalex.org/W2143909327","https://openalex.org/W2146611296","https://openalex.org/W2176747730","https://openalex.org/W2251532325","https://openalex.org/W2333869848","https://openalex.org/W2343695530","https://openalex.org/W2365763746","https://openalex.org/W2475663704","https://openalex.org/W2480421838","https://openalex.org/W2523475814","https://openalex.org/W2584732844","https://openalex.org/W2741403979","https://openalex.org/W2746871167","https://openalex.org/W2766677789","https://openalex.org/W2786263629","https://openalex.org/W4234381228","https://openalex.org/W4242577057","https://openalex.org/W6635276414","https://openalex.org/W6664035182","https://openalex.org/W6707928459","https://openalex.org/W6745663435","https://openalex.org/W6748392146"],"related_works":["https://openalex.org/W2085105049","https://openalex.org/W2370279919","https://openalex.org/W2152301037","https://openalex.org/W2503163466","https://openalex.org/W2027246841","https://openalex.org/W3203561460","https://openalex.org/W2094210932","https://openalex.org/W1836109266","https://openalex.org/W4251138667","https://openalex.org/W2384717171"],"abstract_inverted_index":{"FPGAs":[0,44],"are":[1,18],"becoming":[2],"a":[3,7,35,52,61,130],"promising":[4],"choice":[5],"as":[6,28,60],"heterogeneous":[8],"computing":[9,13],"component":[10],"for":[11,32,45,74],"scientific":[12],"when":[14],"floating-point":[15,75,89],"optimized":[16,86],"architectures":[17],"added":[19],"to":[20,39,64,115],"current":[21],"FPGAs.":[22],"The":[23,152,167,186],"high-level":[24],"synthesis":[25],"tools":[26],"such":[27],"Intel":[29,103,142],"FPGA":[30,73,105,172,192],"SDK":[31],"OpenCL":[33,81,118],"provide":[34],"streamlined":[36],"design":[37],"flow":[38],"facilitate":[40],"the":[41,57,66,80,84,116,121,126,137,159,165,171,181,191,199,207],"use":[42],"of":[43,70,83,120,132],"researchers.":[46],"In":[47],"this":[48],"paper,":[49],"we":[50],"choose":[51],"nuclear":[53],"reactor":[54],"simulation":[55],"application,":[56],"SimpleMOC":[58],"kernel,":[59,122],"case":[62],"study":[63],"evaluate":[65,108,136],"potential":[67],"and":[68,98,107,111,146,161,175,183,201],"effectiveness":[69],"using":[71],"an":[72,102,141,147],"intensive":[76],"applications.":[77],"We":[78,134],"describe":[79],"implementations":[82],"kernel":[85,96,127,138],"with":[87],"low-latency":[88],"operators,":[90],"on-chip":[91],"memory":[92],"accesses,":[93],"loop":[94],"transformations,":[95],"vectorization,":[97],"compute-unit":[99],"duplication":[100],"on":[101,140,170,180,190,198,206],"Arria10-based":[104],"platform,":[106],"their":[109],"performance":[110,128,187],"resource":[112],"utilizations.":[113],"Compared":[114],"baseline":[117],"implementation":[119],"our":[123],"optimizations":[124],"improve":[125],"by":[129],"factor":[131],"102.":[133],"also":[135],"application":[139],"Xeon":[143],"16-core":[144],"CPU":[145,160],"Nvidia":[148],"Tesla":[149],"K80":[150],"GPU.":[151,208],"GPU":[153,182],"is":[154,173,193],"approximately":[155],"2X":[156],"faster":[157,163],"than":[158,164,178,196,204],"7.5X":[162],"FPGA.":[166],"power":[168],"consumption":[169],"4.5X":[174],"6.4X":[176],"lower":[177,203],"that":[179,197,205],"CPU,":[184,200],"respectively.":[185],"per":[188],"watt":[189],"1.74X":[194],"higher":[195],"1.65X":[202]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
