{"id":"https://openalex.org/W7160556403","doi":"https://doi.org/10.48550/arxiv.2605.04956","title":"KernelBenchX: A Comprehensive Benchmark for Evaluating LLM-Generated GPU Kernels","display_name":"KernelBenchX: A Comprehensive Benchmark for Evaluating LLM-Generated GPU Kernels","publication_year":2026,"publication_date":"2026-05-06","ids":{"openalex":"https://openalex.org/W7160556403","doi":"https://doi.org/10.48550/arxiv.2605.04956"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.04956","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.04956","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.04956","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135620965","display_name":"Han Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Han","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135606713","display_name":"Jintao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jintao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135634583","display_name":"Kai Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Kai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135620341","display_name":"Haoxu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Haoxu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135635060","display_name":"Jianfei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jianfei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135626948","display_name":"Jun Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5356000065803528,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5356000065803528,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.040699999779462814,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.03929999843239784,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.9430999755859375},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.8560000061988831},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6822999715805054},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6363999843597412},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5088000297546387},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.48899999260902405},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.45669999718666077}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.9430999755859375},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.8560000061988831},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7476000189781189},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6822999715805054},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6363999843597412},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.548799991607666},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5088000297546387},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.48899999260902405},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45750001072883606},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.45669999718666077},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41130000352859497},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.37619999051094055},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.37040001153945923},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.34769999980926514},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3441999852657318},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2687000036239624},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2578999996185303}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.04956","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.04956","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.04956","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.04956","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"LLM-based":[0],"Triton":[1],"kernel":[2],"generation":[3],"has":[4],"attracted":[5],"significant":[6],"interest,":[7],"yet":[8],"a":[9,26],"fundamental":[10],"empirical":[11],"question":[12,32],"remains":[13,165],"unanswered:":[14],"where":[15],"does":[16,142],"this":[17,31],"capability":[18],"break":[19],"down,":[20],"and":[21,38,84,157,201],"why?":[22],"We":[23],"present":[24],"KernelBenchX,":[25],"benchmark":[27],"designed":[28],"to":[29,116,124],"answer":[30],"through":[33],"category-aware":[34],"evaluation":[35],"of":[36,50,86,147,177],"correctness":[37,62,76,141],"hardware":[39,203],"efficiency":[40,204],"across":[41,90],"176":[42],"tasks":[43,88,96],"in":[44,74,138],"15":[45],"categories.":[46],"Our":[47],"systematic":[48,175],"comparison":[49],"five":[51,92],"representative":[52],"methods":[53,93],"yields":[54],"three":[55,70],"main":[56],"findings.":[57],"First,":[58],"task":[59],"structure":[60],"determines":[61],"more":[63,72],"than":[64,77,152,182],"method":[65,78],"design.":[66],"Category":[67],"explains":[68],"nearly":[69],"times":[71],"variance":[73,160],"semantic":[75],"(9.4%":[79],"vs":[80,135],"3.3%":[81],"explained":[82],"deviance),":[83],"72%":[85],"Fusion":[87],"fail":[89],"all":[91],"while":[94,118],"Math":[95],"are":[97,150],"solved":[98],"consistently.":[99],"Second,":[100],"iterative":[101],"refinement":[102],"improves":[103],"correctness,":[104],"but":[105],"not":[106,143],"performance.":[107],"Across":[108],"GEAK":[109],"iterations,":[110],"compile":[111],"rate":[112],"rises":[113],"from":[114,122],"52.3%":[115],"68.8%":[117],"average":[119],"speedup":[120,137,159],"declines":[121],"$1.58\\times$":[123,136],"$1.44\\times$;":[125],"newly":[126],"rescued":[127],"kernels":[128,149],"consistently":[129],"underperform":[130],"persistently":[131],"correct":[132,148],"ones":[133],"($1.16\\times$":[134],"round~0$\\to$1).":[139],"Third,":[140],"imply":[144],"efficiency.":[145],"46.6%":[146],"slower":[151],"the":[153],"PyTorch":[154],"eager":[155],"baseline,":[156],"cross-hardware":[158],"reaches":[161],"$21.4\\times$.":[162],"Besides,":[163],"quantization":[164],"completely":[166],"unsolved":[167],"(0/30":[168],"successes)":[169],"despite":[170],"non-trivial":[171],"compilation":[172],"rates,":[173],"revealing":[174],"misunderstanding":[176],"numerical":[178,199],"computation":[179],"contracts":[180],"rather":[181],"surface-level":[183],"syntax":[184],"errors.":[185],"These":[186],"findings":[187],"suggest":[188],"that":[189],"future":[190],"progress":[191],"depends":[192],"on":[193],"handling":[194],"global":[195],"coordination,":[196],"explicitly":[197],"modeling":[198],"precision,":[200],"incorporating":[202],"into":[205],"generation.":[206],"The":[207],"code":[208],"is":[209],"available":[210],"at":[211],"https://github.com/BonnieW05/KernelBenchX":[212]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-08T00:00:00"}
