{"id":"https://openalex.org/W2045856677","doi":"https://doi.org/10.1109/aspdac.2013.6509613","title":"High-level synthesis of multiple dependent CUDA kernels on FPGA","display_name":"High-level synthesis of multiple dependent CUDA kernels on FPGA","publication_year":2013,"publication_date":"2013-01-01","ids":{"openalex":"https://openalex.org/W2045856677","doi":"https://doi.org/10.1109/aspdac.2013.6509613","mag":"2045856677"},"language":"en","primary_location":{"id":"doi:10.1109/aspdac.2013.6509613","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aspdac.2013.6509613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 18th Asia and South Pacific Design Automation Conference (ASP-DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026163196","display_name":"Swathi Gurumani","orcid":null},"institutions":[{"id":"https://openalex.org/I4210108443","display_name":"Advanced Digital Sciences Center","ror":"https://ror.org/01xaqx887","country_code":"SG","type":"facility","lineage":["https://openalex.org/I4210108443"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"S. T. Gurumani","raw_affiliation_strings":["Advanced Digital Sciences Center, Singapore","Adv. Digital Sci. Center, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Advanced Digital Sciences Center, Singapore","institution_ids":["https://openalex.org/I4210108443"]},{"raw_affiliation_string":"Adv. Digital Sci. Center, Singapore, Singapore","institution_ids":["https://openalex.org/I4210108443"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009362997","display_name":"Hisham Cholakkal","orcid":"https://orcid.org/0000-0002-8230-9065"},"institutions":[{"id":"https://openalex.org/I4210108443","display_name":"Advanced Digital Sciences Center","ror":"https://ror.org/01xaqx887","country_code":"SG","type":"facility","lineage":["https://openalex.org/I4210108443"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"H. Cholakkal","raw_affiliation_strings":["Advanced Digital Sciences Center, Singapore","Adv. Digital Sci. Center, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Advanced Digital Sciences Center, Singapore","institution_ids":["https://openalex.org/I4210108443"]},{"raw_affiliation_string":"Adv. Digital Sci. Center, Singapore, Singapore","institution_ids":["https://openalex.org/I4210108443"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100604862","display_name":"Yun Liang","orcid":"https://orcid.org/0000-0003-0799-0054"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yun Liang","raw_affiliation_strings":["Center for Energy-Efficient Computing and Applications, School of EECS, Peking University, China","Center for Energy-efficient Comput. & Applic., Peking Univ., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Energy-Efficient Computing and Applications, School of EECS, Peking University, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Center for Energy-efficient Comput. & Applic., Peking Univ., Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090843153","display_name":"Kyle Rupnow","orcid":"https://orcid.org/0000-0003-2908-2225"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"K. Rupnow","raw_affiliation_strings":["Nanyang Technological University, Singapore","Nanyang Tech. Univ., Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Nanyang Tech. Univ., Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056321228","display_name":"Deming Chen","orcid":"https://orcid.org/0000-0002-3016-0270"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Deming Chen","raw_affiliation_strings":["University of Illinois, Urbana-Champaign, USA","University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana-Champaign, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"University of Illinois at Urbana/Champaign, Urbana, IL, USA#TAB#","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5026163196"],"corresponding_institution_ids":["https://openalex.org/I4210108443"],"apc_list":null,"apc_paid":null,"fwci":5.0434,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.95295063,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"305","last_page":"312"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8609829545021057},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.857388973236084},{"id":"https://openalex.org/keywords/high-level-synthesis","display_name":"High-level synthesis","score":0.7097968459129333},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6509072780609131},{"id":"https://openalex.org/keywords/design-space-exploration","display_name":"Design space exploration","score":0.6376136541366577},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6068873405456543},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5962267518043518},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.512162446975708},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4702146351337433},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.46950650215148926},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.35989058017730713},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.32696467638015747},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.13962823152542114},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1172235906124115}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8609829545021057},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.857388973236084},{"id":"https://openalex.org/C58013763","wikidata":"https://www.wikidata.org/wiki/Q5754574","display_name":"High-level synthesis","level":3,"score":0.7097968459129333},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6509072780609131},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.6376136541366577},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6068873405456543},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5962267518043518},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.512162446975708},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4702146351337433},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.46950650215148926},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.35989058017730713},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.32696467638015747},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.13962823152542114},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1172235906124115},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/aspdac.2013.6509613","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aspdac.2013.6509613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 18th Asia and South Pacific Design Automation Conference (ASP-DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W118713324","https://openalex.org/W153755465","https://openalex.org/W1502558230","https://openalex.org/W1982052956","https://openalex.org/W2000921084","https://openalex.org/W2018055497","https://openalex.org/W2049600101","https://openalex.org/W2054625910","https://openalex.org/W2092698325","https://openalex.org/W2103798154","https://openalex.org/W2108019747","https://openalex.org/W2118124464","https://openalex.org/W2131355383","https://openalex.org/W2141597697","https://openalex.org/W2149576406","https://openalex.org/W2160323811","https://openalex.org/W2168113051","https://openalex.org/W2191327475","https://openalex.org/W2832956274","https://openalex.org/W4251523731","https://openalex.org/W6604839928","https://openalex.org/W6683503243"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825","https://openalex.org/W2893308117"],"abstract_inverted_index":{"High-level":[0],"synthesis":[1,205],"(HLS)":[2],"tools":[3,33],"provide":[4],"automatic":[5],"generation":[6],"of":[7,25,38,98,109,152,159,169,206],"hardware":[8],"at":[9],"the":[10,125,150,179,189,198,204],"register":[11],"transfer":[12],"level":[13],"(RTL)":[14],"from":[15],"algorithm":[16],"descriptions":[17],"written":[18],"in":[19,44,69,127,201],"high-level":[20],"languages,":[21,40],"enabling":[22],"faster":[23],"creation":[24],"custom":[26],"accelerators":[27],"for":[28,93,116,143],"FPGA":[29],"architectures.":[30],"Existing":[31],"HLS":[32,168],"support":[34],"a":[35,95,140,156,160],"wide":[36],"variety":[37,97],"input":[39,87],"and":[41,50,66,74,148],"assist":[42],"users":[43],"design":[45,57,106,131],"space":[46,58,107,132],"exploration":[47,59,80,108,133],"through":[48,155],"automation":[49],"feedback":[51],"on":[52,192],"designs'":[53],"performance":[54,176],"bottlenecks.":[55],"This":[56,164],"applies":[60],"techniques":[61],"such":[62],"as":[63],"pipelining,":[64],"partitioning":[65],"resource":[67,75],"sharing":[68],"order":[70],"to":[71,134],"improve":[72],"performance,":[73],"utilization.":[76],"However,":[77],"although":[78],"automated":[79,105,130],"can":[81,174],"find":[82],"some":[83],"inherent":[84],"parallelism,":[85],"data-parallel":[86],"source":[88,114],"code":[89,115],"is":[90],"still":[91],"superior":[92],"exposing":[94],"greater":[96],"parallelism.":[99],"In":[100,120],"prior":[101],"work,":[102],"we":[103,123,196],"demonstrated":[104],"GPU":[110,180],"multi-threaded":[111],"(CUDA)":[112],"language":[113],"efficient":[117],"RTL":[118],"generation.":[119],"this":[121,129,153],"paper,":[122],"examine":[124],"challenges":[126,200],"extending":[128],"multiple":[135,170],"dependent":[136,171],"CUDA":[137,172,208],"kernels,":[138],"demonstrate":[139,149],"step-by-step":[141],"procedure":[142],"efficiently":[144],"performing":[145],"multi-kernel":[146,207],"synthesis,":[147],"potential":[151],"approach":[154],"case":[157],"study":[158,165],"stereo":[161],"matching":[162],"algorithm.":[163],"demonstrates":[166],"that":[167],"kernels":[173],"maintain":[175],"parity":[177],"with":[178],"implementation,":[181],"while":[182],"consuming":[183],"over":[184],"16X":[185],"less":[186],"energy":[187],"than":[188],"GPU.":[190],"Based":[191],"our":[193],"manual":[194],"procedure,":[195],"identify":[197],"key":[199],"fully":[202],"automating":[203],"programs.":[209]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2016,"cited_by_count":8},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
