{"id":"https://openalex.org/W7128725136","doi":"https://doi.org/10.48550/arxiv.2602.11000","title":"Fine-Tuning GPT-5 for GPU Kernel Generation","display_name":"Fine-Tuning GPT-5 for GPU Kernel Generation","publication_year":2026,"publication_date":"2026-02-11","ids":{"openalex":"https://openalex.org/W7128725136","doi":"https://doi.org/10.48550/arxiv.2602.11000"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.11000","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068820827","display_name":"Ali Soltani Tehrani","orcid":"https://orcid.org/0000-0001-5673-7085"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tehrani, Ali","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125688476","display_name":"Yahya Emara","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Emara, Yahya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125753951","display_name":"Essam Wissam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wissam, Essam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125697149","display_name":"Wojciech Paluch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paluch, Wojciech","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125753033","display_name":"Waleed Atallah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Atallah, Waleed","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125698308","display_name":"\u0141ukasz Dudziak","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dudziak, \u0141ukasz","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125730835","display_name":"Mohamed S. Abdelfattah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdelfattah, Mohamed S.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5068820827"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.19429999589920044,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.19429999589920044,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.12250000238418579,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.08720000088214874,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6643000245094299},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6553000211715698},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6388000249862671},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5274999737739563},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.5073000192642212},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4896000027656555},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3626999855041504},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.33059999346733093}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8452000021934509},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6643000245094299},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6553000211715698},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6388000249862671},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5274999737739563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5123000144958496},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.5073000192642212},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4959000051021576},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4896000027656555},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.37540000677108765},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3626999855041504},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.33059999346733093},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32429999113082886},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.3192000091075897},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3100000023841858},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2775999903678894},{"id":"https://openalex.org/C190902152","wikidata":"https://www.wikidata.org/wiki/Q1325106","display_name":"Optimizing compiler","level":3,"score":0.2734000086784363},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2615000009536743},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2612000107765198},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.2581000030040741},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2533999979496002}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.11000","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.11000","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.11000","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.11000","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.5182126760482788,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Developing":[0],"efficient":[1],"GPU":[2,47],"kernels":[3],"is":[4,186,238],"essential":[5],"for":[6,25,80,117,131,246],"scaling":[7],"modern":[8],"AI":[9],"systems,":[10],"yet":[11],"it":[12,185],"remains":[13],"a":[14,77,90,107,181,210],"complex":[15],"task":[16],"due":[17],"to":[18,98,147,162,168,188,191],"intricate":[19],"hardware":[20,69],"architectures":[21],"and":[22,65,92,106,115,124,152],"the":[23,52,136,154,201],"need":[24],"specialized":[26,231],"optimization":[27],"expertise.":[28],"Although":[29],"Large":[30],"Language":[31],"Models":[32],"(LLMs)":[33],"demonstrate":[34],"strong":[35],"capabilities":[36,228],"in":[37,46,195,229],"general":[38],"sequential":[39],"code":[40,48,133],"generation,":[41],"they":[42],"face":[43],"significant":[44],"challenges":[45],"generation":[49],"because":[50],"of":[51,54,103,121,156,193,207,214],"scarcity":[53],"high-quality":[55],"labeled":[56],"training":[57,104],"data,":[58],"compiler":[59,204],"biases":[60],"when":[61],"generating":[62],"synthetic":[63],"solutions,":[64],"limited":[66,239],"generalization":[67],"across":[68],"generations.":[70],"This":[71],"precludes":[72],"supervised":[73,236],"fine-tuning":[74,129],"(SFT)":[75],"as":[76],"scalable":[78],"methodology":[79],"improving":[81],"current":[82],"LLMs.":[83],"In":[84,135],"contrast,":[85],"reinforcement":[86,118,223],"learning":[87,119,224,237],"(RL)":[88],"offers":[89],"data-efficient":[91],"adaptive":[93],"alternative":[94],"but":[95],"requires":[96],"access":[97],"relevant":[99],"tools,":[100],"careful":[101],"selection":[102],"problems,":[105],"robust":[108],"evaluation":[109],"environment.":[110],"We":[111],"present":[112],"Makora's":[113],"environment":[114],"tools":[116],"finetuning":[120],"frontier":[122],"models":[123,175],"report":[125],"our":[126,139],"results":[127],"from":[128,145,160],"GPT-5":[130],"Triton":[132],"generation.":[134],"single-attempt":[137],"setting,":[138],"fine-tuned":[140],"model":[141],"improves":[142],"kernel":[143],"correctness":[144],"43.7%":[146],"77.0%":[148],"(+33.3":[149],"percentage":[150,165],"points)":[151,166],"increases":[153],"fraction":[155],"problems":[157,194,208],"outperforming":[158,200],"TorchInductor":[159,203],"14.8%":[161],"21.8%":[163],"(+7":[164],"compared":[167],"baseline":[169],"GPT-5,":[170],"while":[171],"exceeding":[172],"prior":[173],"state-of-the-art":[174],"on":[176,205],"KernelBench.":[177],"When":[178],"integrated":[179],"into":[180],"full":[182],"coding":[183],"agent,":[184],"able":[187],"solve":[189],"up":[190],"97.4%":[192],"an":[196],"expanded":[197],"KernelBench":[198],"suite,":[199],"PyTorch":[202],"72.9%":[206],"with":[209,222],"geometric":[211],"mean":[212],"speedup":[213],"2.12x.":[215],"Our":[216],"work":[217],"demonstrates":[218],"that":[219],"targeted":[220],"post-training":[221],"can":[225],"unlock":[226],"LLM":[227],"highly":[230],"technical":[232],"domains":[233],"where":[234],"traditional":[235],"by":[240],"data":[241],"availability,":[242],"opening":[243],"new":[244],"pathways":[245],"AI-assisted":[247],"accelerator":[248],"programming.":[249]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-13T00:00:00"}
