{"id":"https://openalex.org/W4414198133","doi":"https://doi.org/10.1109/dac63849.2025.11132713","title":"GEM: GPU-Accelerated Emulator-Inspired RTL Simulation","display_name":"GEM: GPU-Accelerated Emulator-Inspired RTL Simulation","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414198133","doi":"https://doi.org/10.1109/dac63849.2025.11132713"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132713","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132713","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039573083","display_name":"Zizheng Guo","orcid":"https://orcid.org/0000-0002-0724-5356"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zizheng Guo","raw_affiliation_strings":["Peking University,School of Integrated Circuits,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Integrated Circuits,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059148667","display_name":"Yanqing Zhang","orcid":"https://orcid.org/0000-0002-7074-7957"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanqing Zhang","raw_affiliation_strings":["NVIDIA Corporation"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101415852","display_name":"Runsheng Wang","orcid":"https://orcid.org/0000-0002-8016-5934"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runsheng Wang","raw_affiliation_strings":["Peking University,School of Integrated Circuits,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Integrated Circuits,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000933188","display_name":"Yibo Lin","orcid":"https://orcid.org/0000-0002-0977-2774"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yibo Lin","raw_affiliation_strings":["Peking University,School of Integrated Circuits,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Integrated Circuits,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029928585","display_name":"Haoxing Ren","orcid":"https://orcid.org/0000-0003-1028-3860"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoxing Ren","raw_affiliation_strings":["NVIDIA Corporation"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation","institution_ids":["https://openalex.org/I4210127875"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5039573083"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":4.6008,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95172708,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9495000243186951,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9495000243186951,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12810","display_name":"Real-time simulation and control systems","score":0.9229999780654907,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5896000266075134},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5278000235557556},{"id":"https://openalex.org/keywords/design-flow","display_name":"Design flow","score":0.507099986076355},{"id":"https://openalex.org/keywords/logic-simulation","display_name":"Logic simulation","score":0.5},{"id":"https://openalex.org/keywords/register-transfer-level","display_name":"Register-transfer level","score":0.4828999936580658},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.4683000147342682},{"id":"https://openalex.org/keywords/logic-synthesis","display_name":"Logic synthesis","score":0.4300000071525574},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.41519999504089355},{"id":"https://openalex.org/keywords/circuit-design","display_name":"Circuit design","score":0.4124000072479248}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.771399974822998},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.6316999793052673},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5896000266075134},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5278000235557556},{"id":"https://openalex.org/C37135326","wikidata":"https://www.wikidata.org/wiki/Q931942","display_name":"Design flow","level":2,"score":0.507099986076355},{"id":"https://openalex.org/C64859876","wikidata":"https://www.wikidata.org/wiki/Q173673","display_name":"Logic simulation","level":3,"score":0.5},{"id":"https://openalex.org/C34854456","wikidata":"https://www.wikidata.org/wiki/Q1484552","display_name":"Register-transfer level","level":4,"score":0.4828999936580658},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.4683000147342682},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4537000060081482},{"id":"https://openalex.org/C157922185","wikidata":"https://www.wikidata.org/wiki/Q173198","display_name":"Logic synthesis","level":3,"score":0.4300000071525574},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.41519999504089355},{"id":"https://openalex.org/C190560348","wikidata":"https://www.wikidata.org/wiki/Q3245116","display_name":"Circuit design","level":2,"score":0.4124000072479248},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.38769999146461487},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.36970001459121704},{"id":"https://openalex.org/C74524168","wikidata":"https://www.wikidata.org/wiki/Q1074539","display_name":"Integrated circuit design","level":2,"score":0.3538999855518341},{"id":"https://openalex.org/C64260653","wikidata":"https://www.wikidata.org/wiki/Q1194864","display_name":"Electronic design automation","level":2,"score":0.3375999927520752},{"id":"https://openalex.org/C131017901","wikidata":"https://www.wikidata.org/wiki/Q170451","display_name":"Logic gate","level":2,"score":0.3059999942779541},{"id":"https://openalex.org/C87695204","wikidata":"https://www.wikidata.org/wiki/Q629971","display_name":"Asynchronous circuit","level":5,"score":0.29679998755455017},{"id":"https://openalex.org/C188817802","wikidata":"https://www.wikidata.org/wiki/Q13426855","display_name":"Physical design","level":3,"score":0.2944999933242798},{"id":"https://openalex.org/C206274596","wikidata":"https://www.wikidata.org/wiki/Q1063837","display_name":"Programmable logic device","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C28449271","wikidata":"https://www.wikidata.org/wiki/Q6667469","display_name":"Logic optimization","level":4,"score":0.2800000011920929},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C530198007","wikidata":"https://www.wikidata.org/wiki/Q80831","display_name":"Integrated circuit","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2639999985694885},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C36941000","wikidata":"https://www.wikidata.org/wiki/Q209455","display_name":"VHDL","level":3,"score":0.2547000050544739},{"id":"https://openalex.org/C142962650","wikidata":"https://www.wikidata.org/wiki/Q240838","display_name":"Reconfigurable computing","level":3,"score":0.25450000166893005},{"id":"https://openalex.org/C162454741","wikidata":"https://www.wikidata.org/wiki/Q173359","display_name":"Logic family","level":4,"score":0.25209999084472656},{"id":"https://openalex.org/C62460635","wikidata":"https://www.wikidata.org/wiki/Q5508853","display_name":"Functional verification","level":3,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11132713","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132713","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2059896017","https://openalex.org/W2095266728","https://openalex.org/W2789489838","https://openalex.org/W3027968530","https://openalex.org/W3092265192","https://openalex.org/W3146200412","https://openalex.org/W3146952922","https://openalex.org/W3213528054","https://openalex.org/W4248808211","https://openalex.org/W4253751283","https://openalex.org/W4293261783","https://openalex.org/W4320067872","https://openalex.org/W4327911467","https://openalex.org/W4386763614","https://openalex.org/W4389491860","https://openalex.org/W4409282490"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"present":[4],"a":[5,76,93,103,150],"GPU-accelerated":[6],"RTL":[7,18,45,126,143],"simulator":[8],"addressing":[9],"critical":[10],"challenges":[11],"in":[12,102],"high-speed":[13,142],"circuit":[14,53,97,155],"verification.":[15,156],"Traditional":[16],"CPU-based":[17],"simulators":[19],"struggle":[20],"with":[21,57,145],"scalability":[22],"and":[23,25,34,119,148],"performance,":[24],"while":[26],"FPGA-based":[27],"emulators":[28],"offer":[29],"acceleration,":[30],"they":[31],"are":[32],"costly":[33],"less":[35],"accessible.":[36],"Previous":[37],"GPU-based":[38],"attempts":[39],"have":[40],"failed":[41],"to":[42,48,99,106,132],"speed":[43],"up":[44,131],"simulation":[46,144],"due":[47],"the":[49,58,69,100,107,137],"heterogeneous":[50],"nature":[51],"of":[52,65,71,115],"partitions,":[54],"which":[55],"conflicts":[56],"SIMT":[59],"(Single":[60],"Instruction,":[61],"Multiple":[62],"Thread)":[63],"paradigm":[64],"GPUs.":[66],"Inspired":[67],"by":[68],"design":[70,92],"emulators,":[72],"our":[73],"approach":[74],"introduces":[75],"novel":[77],"virtual":[78],"Very":[79],"Long":[80],"Instruction":[81],"Word":[82],"(VLIW)":[83],"architecture,":[84],"designed":[85],"for":[86,125,153],"efficient":[87],"CUDA":[88],"execution.":[89],"We":[90],"also":[91],"flow":[94],"that":[95],"maps":[96],"logic":[98],"architecture":[101,112],"process":[104],"analogous":[105],"FPGA":[108],"CAD":[109],"flow.":[110],"This":[111],"mitigates":[113],"issues":[114],"irregular":[116],"memory":[117],"access":[118],"thread":[120],"divergence,":[121],"unlocking":[122],"GPU":[123],"potential":[124],"simulation.":[127],"Our":[128],"solution":[129],"achieves":[130],"$64":[133],"\\times$":[134],"speed-up":[135],"over":[136],"best":[138],"CPU":[139],"simulators,":[140],"democratizing":[141],"accessible":[146],"hardware":[147],"establishing":[149],"new":[151],"frontier":[152],"GPUaccelerated":[154]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
