{"id":"https://openalex.org/W4416429599","doi":"https://doi.org/10.1109/iccad66269.2025.11240644","title":"LLM4Verilog: Building Large-Scale, High-Quality Data Infrastructure for Verilog Code Generation via Community Efforts","display_name":"LLM4Verilog: Building Large-Scale, High-Quality Data Infrastructure for Verilog Code Generation via Community Efforts","publication_year":2025,"publication_date":"2025-10-26","ids":{"openalex":"https://openalex.org/W4416429599","doi":"https://doi.org/10.1109/iccad66269.2025.11240644"},"language":null,"primary_location":{"id":"doi:10.1109/iccad66269.2025.11240644","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccad66269.2025.11240644","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034614246","display_name":"Zhongzhi Yu","orcid":"https://orcid.org/0000-0002-9981-4981"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhongzhi Yu","raw_affiliation_strings":["Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065172226","display_name":"Chaojian Li","orcid":"https://orcid.org/0000-0003-4030-9777"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chaojian Li","raw_affiliation_strings":["Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101912939","display_name":"Yongan Zhang","orcid":"https://orcid.org/0000-0001-7919-049X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yongan Zhang","raw_affiliation_strings":["Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100681148","display_name":"Mingjie Liu","orcid":"https://orcid.org/0000-0002-7336-8282"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingjie Liu","raw_affiliation_strings":["Nvidia Corporation"],"affiliations":[{"raw_affiliation_string":"Nvidia Corporation","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050270997","display_name":"Nathaniel Pinckney","orcid":"https://orcid.org/0000-0001-6159-8964"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathaniel Pinckney","raw_affiliation_strings":["Nvidia Corporation"],"affiliations":[{"raw_affiliation_string":"Nvidia Corporation","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059937508","display_name":"Wenfei Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenfei Zhou","raw_affiliation_strings":["Nvidia Corporation"],"affiliations":[{"raw_affiliation_string":"Nvidia Corporation","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013528664","display_name":"Rongjian Liang","orcid":"https://orcid.org/0000-0001-8626-2359"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rongjian Liang","raw_affiliation_strings":["Nvidia Corporation"],"affiliations":[{"raw_affiliation_string":"Nvidia Corporation","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100642435","display_name":"Haoyu Yang","orcid":"https://orcid.org/0000-0002-4709-0061"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoyu Yang","raw_affiliation_strings":["Nvidia Corporation"],"affiliations":[{"raw_affiliation_string":"Nvidia Corporation","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029928585","display_name":"Haoxing Ren","orcid":"https://orcid.org/0000-0003-1028-3860"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoxing Ren","raw_affiliation_strings":["Nvidia Corporation"],"affiliations":[{"raw_affiliation_string":"Nvidia Corporation","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019582323","display_name":"Yingyan Lin","orcid":"https://orcid.org/0000-0001-5946-203X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yingyan Celine Lin","raw_affiliation_strings":["Georgia Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5034614246"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19974922,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2797999978065491,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2797999978065491,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.11720000207424164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.07159999758005142,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5957000255584717},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.578499972820282},{"id":"https://openalex.org/keywords/verilog","display_name":"Verilog","score":0.5230000019073486},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.48030000925064087},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.45210000872612},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.3617999851703644}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7893999814987183},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5957000255584717},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.578499972820282},{"id":"https://openalex.org/C2779030575","wikidata":"https://www.wikidata.org/wiki/Q827773","display_name":"Verilog","level":3,"score":0.5230000019073486},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.48030000925064087},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.45210000872612},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37689998745918274},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3617999851703644},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36090001463890076},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.36059999465942383},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.35929998755455017},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.3407000005245209},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2816999852657318},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2770000100135803},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C109747225","wikidata":"https://www.wikidata.org/wiki/Q815758","display_name":"Scarcity","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccad66269.2025.11240644","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccad66269.2025.11240644","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2132069633","https://openalex.org/W2946749442","https://openalex.org/W3098605233","https://openalex.org/W3177765786","https://openalex.org/W4206410393","https://openalex.org/W4252326246","https://openalex.org/W4385562549","https://openalex.org/W4389166691","https://openalex.org/W4389166737","https://openalex.org/W4391681217","https://openalex.org/W4393145520","https://openalex.org/W4402670687","https://openalex.org/W4403023622","https://openalex.org/W4403024064","https://openalex.org/W4404783735","https://openalex.org/W4409285487","https://openalex.org/W4409369309","https://openalex.org/W4412887834","https://openalex.org/W4413145157","https://openalex.org/W4413822481","https://openalex.org/W4413964757","https://openalex.org/W4415003690"],"related_works":[],"abstract_inverted_index":{"Despite":[0],"recent":[1],"advancements":[2],"in":[3,29,101,149],"code":[4,13,192],"generation":[5,193],"with":[6,81,161],"large":[7],"language":[8],"models":[9,159],"(LLMs),":[10],"generating":[11],"hardware":[12,31],"such":[14],"as":[15,184],"Verilog":[16,71,191],"remains":[17],"a":[18,67,76,82,102,123,185],"significant":[19],"challenge":[20],"due":[21],"to":[22,87,117,158,206],"the":[23,30,131],"scarcity":[24],"of":[25,133],"large-scale,":[26,68],"high-quality":[27],"datasets":[28,135],"domain.":[32],"Existing":[33],"approaches,":[34],"including":[35],"scraping":[36],"open-source":[37,70],"repositories":[38],"and":[39,51,97,121,146,154,201],"relying":[40,171],"on":[41,141,152,172],"manually":[42],"curated":[43],"datasets,":[44],"often":[45],"suffer":[46],"from":[47],"limited":[48],"diversity,":[49],"quality,":[50],"scalability.":[52],"To":[53],"address":[54],"these":[55,134,166],"limitations,":[56],"we":[57],"introduce":[58],"LLM4Verilog,":[59],"an":[60,111],"exploratory,":[61],"collaborative":[62],"initiative":[63,74,200],"aimed":[64],"at":[65],"constructing":[66],"high-quality,":[69,124],"dataset.":[72],"Our":[73],"integrates":[75],"community-driven":[77],"data":[78,84,176],"collection":[79],"pipeline":[80],"two-stage":[83],"filtering":[85,120],"technique":[86],"ensure":[88],"high":[89],"dataset":[90,104,126],"quality.":[91],"The":[92,107],"first":[93],"stage":[94,109],"removes":[95],"duplicates":[96],"low-quality":[98],"samples,":[99],"resulting":[100,202],"large-scale":[103],"called":[105,127],"LLM4Verilog-complete.":[106],"second":[108],"applies":[110],"LLM-driven":[112],"quality":[113],"scoring":[114],"method,":[115],"VeriScore,":[116],"perform":[118],"fine-grained":[119],"produce":[122],"ready-to-use":[125],"LLM4Verilog-filtered.":[128],"We":[129],"evaluate":[130],"effectiveness":[132],"through":[136],"fine-tuning":[137,174],"three":[138],"different":[139],"LLMs":[140],"our":[142,180,199],"dataset,":[143,203],"achieving":[144],"6.6%~11.2%":[145],"5.3%~13.2%":[147],"improvements":[148,167],"pass@1":[150],"scores":[151],"VerilogEval-human":[153],"VerilogEval-Machine,":[155],"respectively,":[156],"compared":[157],"fine-tuned":[160],"prior":[162],"state-of-the-art":[163],"datasets.":[164],"Notably,":[165],"are":[168],"achieved":[169],"without":[170],"complex":[173],"or":[175],"augmentation":[177],"techniques,":[178],"highlighting":[179],"dataset\u2019s":[181],"strong":[182],"potential":[183],"foundational":[186],"resource":[187],"for":[188],"enhancing":[189],"LLMs\u2019":[190],"capabilities.":[194],"For":[195],"more":[196],"information":[197],"about":[198],"please":[204],"refer":[205],"https://nvlabs.github.io/LLM4HWDesign/.":[207]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-20T00:00:00"}
