{"id":"https://openalex.org/W4415974422","doi":"https://doi.org/10.1016/j.procs.2025.09.460","title":"Automated Transformation of OpenMP to CUDA Kernels Using AI Models","display_name":"Automated Transformation of OpenMP to CUDA Kernels Using AI Models","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4415974422","doi":"https://doi.org/10.1016/j.procs.2025.09.460"},"language":"en","primary_location":{"id":"doi:10.1016/j.procs.2025.09.460","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2025.09.460","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1016/j.procs.2025.09.460","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078972354","display_name":"Mateusz Gru\u017cewski","orcid":"https://orcid.org/0000-0002-9419-2749"},"institutions":[{"id":"https://openalex.org/I155313962","display_name":"West Pomeranian University of Technology","ror":"https://ror.org/0596m7f19","country_code":"PL","type":"education","lineage":["https://openalex.org/I155313962"]}],"countries":["PL"],"is_corresponding":true,"raw_author_name":"Mateusz Gruzewski","raw_affiliation_strings":["West Pomeranian University of Technology, Zolnierska 49, Szczecin 71210, Poland"],"affiliations":[{"raw_affiliation_string":"West Pomeranian University of Technology, Zolnierska 49, Szczecin 71210, Poland","institution_ids":["https://openalex.org/I155313962"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5078972354"],"corresponding_institution_ids":["https://openalex.org/I155313962"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38075003,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"270","issue":null,"first_page":"3352","last_page":"3361"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9603000283241272,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9603000283241272,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.00430000014603138,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.0024999999441206455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.9067999720573425},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.7955999970436096},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7441999912261963},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6779999732971191},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.5834000110626221},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.48010000586509705},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.46239998936653137},{"id":"https://openalex.org/keywords/loop-unrolling","display_name":"Loop unrolling","score":0.4106999933719635},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.40119999647140503}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9129999876022339},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.9067999720573425},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.7955999970436096},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7441999912261963},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7193999886512756},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6779999732971191},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.5834000110626221},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.48010000586509705},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.46239998936653137},{"id":"https://openalex.org/C76970557","wikidata":"https://www.wikidata.org/wiki/Q1869750","display_name":"Loop unrolling","level":3,"score":0.4106999933719635},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.40119999647140503},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.3594000041484833},{"id":"https://openalex.org/C2778361913","wikidata":"https://www.wikidata.org/wiki/Q7248437","display_name":"Program transformation","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.34599998593330383},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.3310000002384186},{"id":"https://openalex.org/C1306188","wikidata":"https://www.wikidata.org/wiki/Q4060687","display_name":"Nested loop join","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C42383842","wikidata":"https://www.wikidata.org/wiki/Q193076","display_name":"Functional programming","level":2,"score":0.2978000044822693},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2784000039100647},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C195701839","wikidata":"https://www.wikidata.org/wiki/Q4008398","display_name":"Partial evaluation","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.25769999623298645},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.25369998812675476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.procs.2025.09.460","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2025.09.460","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.procs.2025.09.460","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2025.09.460","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W68373795","https://openalex.org/W1558370006","https://openalex.org/W1987304299","https://openalex.org/W2023446554","https://openalex.org/W2080592089","https://openalex.org/W2170634604","https://openalex.org/W3028670792","https://openalex.org/W3170092793","https://openalex.org/W4221061044","https://openalex.org/W4382490953","https://openalex.org/W4386141747","https://openalex.org/W4386269029","https://openalex.org/W4391169057","https://openalex.org/W4396790347","https://openalex.org/W4403673926"],"related_works":[],"abstract_inverted_index":{"The":[0,64,170,182],"increasing":[1],"demand":[2],"for":[3,59],"computational":[4],"efficiency":[5],"in":[6,91,144,157],"high-performance":[7],"computing":[8],"(HPC)":[9],"has":[10],"driven":[11],"research":[12],"into":[13,33],"automating":[14],"the":[15,57,81,92,103,115,145,150,167,174,188],"transformation":[16],"of":[17,68,76,139,178],"parallel":[18,31,129,209],"programming":[19],"paradigms.":[20],"This":[21],"paper":[22],"investigates":[23],"an":[24],"AI-driven":[25],"approach":[26,172],"to":[27,101,106,108,203],"translating":[28],"OpenMP-based":[29],"CPU":[30],"programs":[32],"CUDA-based":[34],"GPU":[35],"programs.":[36],"Using":[37],"omniCUDA,":[38],"a":[39,73],"custom":[40],"fine-tuned":[41],"large":[42],"language":[43],"model":[44,116,151,183],"(LLM),":[45],"functional":[46,126],"CUDA":[47,122,164],"kernels":[48,88,165],"can":[49],"be":[50],"generated":[51],"directly":[52],"from":[53,80,166],"OpenMP":[54,154],"code,":[55,123],"bypassing":[56],"need":[58],"traditional":[60],"compiler":[61],"optimization":[62],"techniques.":[63],"training":[65,93,146,189],"dataset":[66],"consists":[67],"synthetic":[69],"OpenMP-to-CUDA":[70,180],"pairs":[71],"and":[72,120,176,191,200],"selected":[74],"subset":[75],"manually":[77,162],"optimized":[78,163],"algorithms":[79],"PolyBench":[82,168],"suite.":[83,169],"Performance":[84,132],"was":[85],"evaluated":[86],"on":[87,134,195],"not":[89,142],"included":[90,143],"set,":[94,190],"with":[95],"only":[96],"partial":[97],"overlap,":[98],"allowing":[99],"me":[100],"assess":[102],"model\u2019s":[104],"ability":[105],"generalize":[107],"unseen":[109],"algorithms.":[110],"Experimental":[111],"results":[112],"confirm":[113],"that":[114,149],"produces":[117],"syntactically":[118],"correct":[119],"compilable":[121],"successfully":[124],"replicating":[125],"behavior":[127],"across":[128,207],"loop":[130],"structures.":[131],"evaluation":[133],"four":[135],"benchmark":[136],"algorithms,":[137],"three":[138],"which":[140],"were":[141],"dataset,":[147],"shows":[148],"consistently":[152],"outperforms":[153],"implementations":[155],"and,":[156],"some":[158],"cases,":[159],"surpasses":[160],"even":[161],"presented":[171],"demonstrates":[173],"feasibility":[175],"competitiveness":[177],"AI-assisted":[179],"transformation.":[181],"exhibits":[184],"generalization":[185],"capabilities":[186],"beyond":[187],"ongoing":[192],"work":[193],"focuses":[194],"refining":[196],"memory":[197],"access":[198],"strategies":[199],"kernel":[201],"configurations":[202],"further":[204],"enhance":[205],"performance":[206],"diverse":[208],"workloads.":[210]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-06T00:00:00"}
