{"id":"https://openalex.org/W4320067872","doi":"https://doi.org/10.1145/3545008.3545091","title":"From RTL to CUDA: A GPU Acceleration Flow for RTL Simulation with Batch Stimulus","display_name":"From RTL to CUDA: A GPU Acceleration Flow for RTL Simulation with Batch Stimulus","publication_year":2022,"publication_date":"2022-08-29","ids":{"openalex":"https://openalex.org/W4320067872","doi":"https://doi.org/10.1145/3545008.3545091"},"language":"en","primary_location":{"id":"doi:10.1145/3545008.3545091","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3545008.3545091","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3545008.3545091","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3545008.3545091","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048830943","display_name":"Dian-Lun Lin","orcid":"https://orcid.org/0000-0003-3075-7437"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dian-Lun Lin","raw_affiliation_strings":["University of Utah, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Utah, United States of America","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029928585","display_name":"Haoxing Ren","orcid":"https://orcid.org/0000-0003-1028-3860"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoxing Ren","raw_affiliation_strings":["Nvidia Research, United States of America"],"affiliations":[{"raw_affiliation_string":"Nvidia Research, United States of America","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100612105","display_name":"Yanqing Zhang","orcid":"https://orcid.org/0000-0003-2349-1925"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanqing Zhang","raw_affiliation_strings":["Nvidia Research, United States of America"],"affiliations":[{"raw_affiliation_string":"Nvidia Research, United States of America","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010156116","display_name":"Brucek Khailany","orcid":"https://orcid.org/0000-0002-7584-3489"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brucek Khailany","raw_affiliation_strings":["Nvidia Research, United States of America"],"affiliations":[{"raw_affiliation_string":"Nvidia Research, United States of America","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088685794","display_name":"Tsung\u2010Wei Huang","orcid":"https://orcid.org/0000-0001-9768-3378"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tsung-Wei Huang","raw_affiliation_strings":["University of Utah, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Utah, United States of America","institution_ids":["https://openalex.org/I223532165"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5048830943"],"corresponding_institution_ids":["https://openalex.org/I223532165"],"apc_list":null,"apc_paid":null,"fwci":9.0846,"has_fulltext":false,"cited_by_count":40,"citation_normalized_percentile":{"value":0.9867945,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8084560632705688},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.6974101066589355},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6666762828826904},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.48530715703964233},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.45556971430778503},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.42671650648117065},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4225006699562073},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.35788899660110474},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.2094573676586151},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10987567901611328}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8084560632705688},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6974101066589355},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6666762828826904},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.48530715703964233},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.45556971430778503},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.42671650648117065},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4225006699562073},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.35788899660110474},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.2094573676586151},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10987567901611328}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3545008.3545091","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3545008.3545091","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3545008.3545091","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3545008.3545091","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3545008.3545091","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3545008.3545091","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.6100000143051147}],"awards":[{"id":"https://openalex.org/G7737325972","display_name":null,"funder_award_id":"CCF-2225233;CCF-2144523","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4320067872.pdf"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W581518716","https://openalex.org/W2054261835","https://openalex.org/W2059896017","https://openalex.org/W2095266728","https://openalex.org/W2111979321","https://openalex.org/W2397503904","https://openalex.org/W2963366311","https://openalex.org/W3092265192","https://openalex.org/W3111098492","https://openalex.org/W3114262462","https://openalex.org/W3188917597","https://openalex.org/W3198396679","https://openalex.org/W3206719683","https://openalex.org/W4205834599","https://openalex.org/W4252769808","https://openalex.org/W4283390926","https://openalex.org/W4293025123","https://openalex.org/W4293261783","https://openalex.org/W4388320658"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825"],"abstract_inverted_index":{"High-throughput":[0],"RTL":[1,16,66,75,86],"simulation":[2,17,28,67],"is":[3],"critical":[4],"for":[5,99],"verifying":[6],"today\u2019s":[7],"highly":[8],"complex":[9],"SoCs.":[10],"Recent":[11],"research":[12],"has":[13],"explored":[14],"accelerating":[15],"by":[18],"leveraging":[19],"event-driven":[20],"approaches":[21],"or":[22,55],"partitioning":[23],"heuristics":[24],"to":[25,134],"speed":[26],"up":[27],"on":[29,106,120],"a":[30,64,82,107,121,127],"single":[31,122],"stimulus.":[32,71,90],"To":[33],"further":[34],"accelerate":[35],"throughput":[36],"performance,":[37],"industry-quality":[38],"functional":[39],"verification":[40],"signoff":[41],"must":[42],"explore":[43],"running":[44,119],"multiple":[45,89],"stimulus":[46],"(i.e.,":[47],"batch":[48,70],"stimulus)":[49],"simultaneously,":[50],"either":[51],"with":[52,69,112],"directed":[53],"tests":[54],"random":[56],"inputs.":[57],"In":[58],"this":[59],"paper,":[60],"we":[61,115],"propose":[62],"RTLFlow,":[63],"GPU-accelerated":[65],"flow":[68],"RTLflow":[72,118],"first":[73],"transpiles":[74],"into":[76],"CUDA":[77,94],"kernels":[78],"that":[79,117],"each":[80],"simulates":[81],"partition":[83],"of":[84],"the":[85],"simultaneously":[87],"across":[88],"It":[91],"also":[92],"leverages":[93],"Graph":[95],"and":[96],"pipeline":[97],"scheduling":[98],"efficient":[100],"runtime":[101,130],"execution.":[102],"Measuring":[103],"experimental":[104],"results":[105],"large":[108],"industrial":[109],"design":[110],"(NVDLA)":[111],"65536":[113],"stimulus,":[114],"show":[116],"A6000":[123],"GPU":[124],"can":[125],"achieve":[126],"40":[128],"\u00d7":[129],"speed-up":[131],"when":[132],"compared":[133],"an":[135],"80-thread":[136],"multi-core":[137],"CPU":[138],"baseline.":[139]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
