{"id":"https://openalex.org/W2256441331","doi":"https://doi.org/10.1145/2847263.2847266","title":"GPU-Accelerated High-Level Synthesis for Bitwidth Optimization of FPGA Datapaths","display_name":"GPU-Accelerated High-Level Synthesis for Bitwidth Optimization of FPGA Datapaths","publication_year":2016,"publication_date":"2016-02-04","ids":{"openalex":"https://openalex.org/W2256441331","doi":"https://doi.org/10.1145/2847263.2847266","mag":"2256441331"},"language":"en","primary_location":{"id":"doi:10.1145/2847263.2847266","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2847263.2847266","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015534628","display_name":"Nachiket Kapre","orcid":"https://orcid.org/0000-0002-2187-0406"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Nachiket Kapre","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073681676","display_name":"Deheng Ye","orcid":"https://orcid.org/0000-0002-1754-1837"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Deheng Ye","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5015534628"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":0.946,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.71779199,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"185","last_page":"194"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8372715711593628},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7929503917694092},{"id":"https://openalex.org/keywords/datapath","display_name":"Datapath","score":0.768811821937561},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.6121470928192139},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5397495031356812},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5252016186714172},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.41970276832580566},{"id":"https://openalex.org/keywords/simulated-annealing","display_name":"Simulated annealing","score":0.41778382658958435},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2642887830734253},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.157161146402359}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8372715711593628},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7929503917694092},{"id":"https://openalex.org/C2781198647","wikidata":"https://www.wikidata.org/wiki/Q1633673","display_name":"Datapath","level":2,"score":0.768811821937561},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.6121470928192139},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5397495031356812},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5252016186714172},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.41970276832580566},{"id":"https://openalex.org/C126980161","wikidata":"https://www.wikidata.org/wiki/Q863783","display_name":"Simulated annealing","level":2,"score":0.41778382658958435},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2642887830734253},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.157161146402359},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2847263.2847266","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2847263.2847266","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1507872748","https://openalex.org/W1519244489","https://openalex.org/W1561086741","https://openalex.org/W1994417982","https://openalex.org/W2002649790","https://openalex.org/W2018055497","https://openalex.org/W2030605379","https://openalex.org/W2037139354","https://openalex.org/W2075412885","https://openalex.org/W2076688134","https://openalex.org/W2080794684","https://openalex.org/W2096059450","https://openalex.org/W2101571719","https://openalex.org/W2115762048","https://openalex.org/W2123485201","https://openalex.org/W2131798279","https://openalex.org/W2133990480","https://openalex.org/W2146471606","https://openalex.org/W2151176284","https://openalex.org/W2153185479","https://openalex.org/W2153316377","https://openalex.org/W2159401583","https://openalex.org/W2600258283"],"related_works":["https://openalex.org/W2109699519","https://openalex.org/W2293118914","https://openalex.org/W2006568360","https://openalex.org/W2998381397","https://openalex.org/W4236419692","https://openalex.org/W102726818","https://openalex.org/W4233616027","https://openalex.org/W2059591361","https://openalex.org/W970262775","https://openalex.org/W2084470113"],"abstract_inverted_index":{"Bitwidth":[0],"optimization":[1,54],"of":[2,14,26,52,80,96,173,176,178,186],"FPGA":[3],"datapaths":[4],"can":[5],"save":[6],"hardware":[7],"resources":[8],"by":[9,58,118],"choosing":[10],"the":[11,49,56,112],"fewest":[12],"number":[13],"bits":[15],"required":[16],"for":[17,77,132,138],"each":[18,120],"datapath":[19],"variable":[20],"to":[21,47,98,103,122,129,147,164],"achieve":[22],"a":[23,60,65,72,123],"desired":[24],"quality":[25,157],"result.":[27],"However,":[28],"it":[29],"is":[30],"an":[31,148],"NP-hard":[32],"problem":[33],"that":[34],"requires":[35],"unacceptably":[36],"long":[37],"runtimes":[38],"when":[39,141,180],"using":[40,85],"sequential":[41],"CPU-based":[42],"heuristics.":[43],"We":[44,70,126],"show":[45],"how":[46],"parallelize":[48],"key":[50],"steps":[51],"bitwidth":[53,108,114,139],"on":[55],"GPU":[57,124,145],"performing":[59],"fast":[61],"brute-force":[62],"search":[63,68],"over":[64],"carefully":[66],"constrained":[67],"space.":[69],"develop":[71],"high-level":[73,183],"synthesis":[74],"methodology":[75],"suitable":[76],"rapid":[78],"prototyping":[79],"bitwidth-annotated":[81],"RTL":[82,167],"code":[83],"generation":[84],"gcc's":[86],"GIMPLE":[87],"backend.":[88],"For":[89,107],"range":[90,133],"analysis,":[91],"we":[92,110],"perform":[93],"parallel":[94,117],"evaluation":[95],"sub-intervals":[97],"provide":[99],"tighter":[100],"bounds":[101],"compared":[102],"ordinary":[104],"interval":[105],"arithmetic.":[106],"allocation,":[109],"enumerate":[111],"different":[113],"combinations":[115],"in":[116,171],"assigning":[119],"combination":[121],"thread.":[125],"demonstrate":[127],"up":[128],"10?1000x":[130],"speedups":[131,137],"analysis":[134],"and":[135],"50?200x":[136],"allocation":[140],"comparing":[142],"NVIDIA":[143],"K20":[144],"implementation":[146],"Intel":[149],"Core":[150],"i5-4570":[151],"CPU":[152],"while":[153],"maintaining":[154],"identical":[155],"solution":[156],"across":[158],"various":[159],"benchmarks.":[160],"This":[161],"allows":[162],"us":[163],"generate":[165],"tailor-made":[166],"with":[168],"minimum":[169],"bitwidths":[170],"hundreds":[172,177],"milliseconds":[174],"instead":[175],"minutes":[179],"starting":[181],"from":[182],"C":[184],"descriptions":[185],"dataflow":[187],"computations.":[188]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
