{"id":"https://openalex.org/W4392265915","doi":"https://doi.org/10.1109/cgo57630.2024.10444828","title":"Retargeting and Respecializing GPU Workloads for Performance Portability","display_name":"Retargeting and Respecializing GPU Workloads for Performance Portability","publication_year":2024,"publication_date":"2024-02-28","ids":{"openalex":"https://openalex.org/W4392265915","doi":"https://doi.org/10.1109/cgo57630.2024.10444828"},"language":"en","primary_location":{"id":"doi:10.1109/cgo57630.2024.10444828","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cgo57630.2024.10444828","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080497888","display_name":"Ivan R. Ivanov","orcid":"https://orcid.org/0000-0003-0356-3768"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]},{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Ivan R. Ivanov","raw_affiliation_strings":["Tokyo Institute of Technology, RIKEN R-CCS,Kobe,Japan","Tokyo Institute of Technology, RIKEN R-CCS, Kobe, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, RIKEN R-CCS,Kobe,Japan","institution_ids":["https://openalex.org/I4210129730"]},{"raw_affiliation_string":"Tokyo Institute of Technology, RIKEN R-CCS, Kobe, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069808908","display_name":"\u041e\u043b\u0435\u043a\u0441\u0430\u043d\u0434\u0440 \u0417\u0456\u043d\u0435\u043d\u043a\u043e","orcid":"https://orcid.org/0000-0001-5228-9940"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Oleksandr Zinenko","raw_affiliation_strings":["Google DeepMind,Paris,France","Google DeepMind, Paris, France"],"affiliations":[{"raw_affiliation_string":"Google DeepMind,Paris,France","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google DeepMind, Paris, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039069961","display_name":"Jens Domke","orcid":"https://orcid.org/0000-0002-5343-414X"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jens Domke","raw_affiliation_strings":["RIKEN R-CCS,Kobe,Japan","RIKEN R-CCS, Kobe, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN R-CCS,Kobe,Japan","institution_ids":["https://openalex.org/I4210129730"]},{"raw_affiliation_string":"RIKEN R-CCS, Kobe, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011254074","display_name":"Toshio Endo","orcid":"https://orcid.org/0000-0001-7297-6211"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toshio Endo","raw_affiliation_strings":["Tokyo Institute of Technology,Tokyo,Japan","Tokyo Institute of Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology,Tokyo,Japan","institution_ids":["https://openalex.org/I114531698"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071127035","display_name":"William S. Moses","orcid":"https://orcid.org/0000-0003-2627-0642"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"William S. Moses","raw_affiliation_strings":["University of Illinois Urhana-Champaign, Google DeepMind,Illinois,United States","University of Illinois Urhana-Champaign, Google DeepMind, Illinois, United States"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urhana-Champaign, Google DeepMind,Illinois,United States","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"University of Illinois Urhana-Champaign, Google DeepMind, Illinois, United States","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080497888"],"corresponding_institution_ids":["https://openalex.org/I114531698","https://openalex.org/I4210129730"],"apc_list":null,"apc_paid":null,"fwci":3.6386,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.93757181,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"119","last_page":"132"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8970216512680054},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7915269136428833},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6706636548042297},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.658798098564148},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5795270204544067},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5233579874038696},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.49898338317871094},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.44946932792663574},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.4235239326953888},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.30715909600257874}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8970216512680054},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7915269136428833},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6706636548042297},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.658798098564148},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5795270204544067},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5233579874038696},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.49898338317871094},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.44946932792663574},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.4235239326953888},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.30715909600257874},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cgo57630.2024.10444828","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cgo57630.2024.10444828","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.6899999976158142}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W73145541","https://openalex.org/W1989562524","https://openalex.org/W1993405055","https://openalex.org/W2055312318","https://openalex.org/W2067479799","https://openalex.org/W2077143534","https://openalex.org/W2078794610","https://openalex.org/W2080592089","https://openalex.org/W2107911628","https://openalex.org/W2534888058","https://openalex.org/W2743508538","https://openalex.org/W2804032941","https://openalex.org/W2808709390","https://openalex.org/W2896831700","https://openalex.org/W2899971035","https://openalex.org/W2954698171","https://openalex.org/W2979365412","https://openalex.org/W2996807164","https://openalex.org/W3122286897","https://openalex.org/W3173740437","https://openalex.org/W3196320218","https://openalex.org/W3205717712","https://openalex.org/W4200091031","https://openalex.org/W4246166885","https://openalex.org/W4318328346","https://openalex.org/W4321446270","https://openalex.org/W4367684987","https://openalex.org/W4376456412","https://openalex.org/W4381894488","https://openalex.org/W6731603714","https://openalex.org/W6751349269","https://openalex.org/W6756195904"],"related_works":["https://openalex.org/W3020739840","https://openalex.org/W2913998709","https://openalex.org/W2128640905","https://openalex.org/W2983282793","https://openalex.org/W2011460173","https://openalex.org/W1973046741","https://openalex.org/W2778498407","https://openalex.org/W2021092055","https://openalex.org/W4381894488","https://openalex.org/W4386875822"],"abstract_inverted_index":{"In":[0],"order":[1],"to":[2,5,37,93,99,125,166,183],"come":[3],"close":[4],"peak":[6],"performance,":[7],"accelerators":[8],"like":[9],"GPUs":[10,170,223],"require":[11],"significant":[12,39],"architecture-specific":[13],"tuning":[14],"that":[15],"understand":[16],"the":[17,27,46,51,76,100,118,138,147,159,180,185,195,205,225],"availability":[18],"of":[19,29,41,117,128,140,149,187],"shared":[20],"memory,":[21],"parallelism,":[22],"tensor":[23],"cores,":[24],"etc.":[25],"Unfortunately,":[26],"pursuit":[28],"higher":[30],"performance":[31,54,90,127,216],"and":[32,108,146,151,177,221],"lower":[33],"costs":[34],"have":[35],"led":[36],"a":[38,64,69,83,89,122],"diversification":[40],"architecture":[42,71],"designs,":[43],"even":[44],"from":[45,175],"same":[47,226],"vendor.":[48],"This":[49],"creates":[50],"need":[52],"for":[53,61,132],"portability":[55],"across":[56],"different":[57,84],"GPUs,":[58],"especially":[59],"important":[60],"programs":[62,131],"in":[63,72],"particular":[65],"programming":[66],"model":[67],"with":[68,191],"certain":[70],"mind.":[73],"Even":[74],"when":[75],"program":[77,181],"can":[78],"be":[79],"seamlessly":[80],"executed":[81],"on":[82,204],"architecture,":[85],"it":[86,94,154],"may":[87],"suffer":[88],"penalty":[91],"due":[92],"not":[95,112],"being":[96],"sized":[97],"appropriately":[98],"available":[101],"hardware":[102],"resources":[103,153],"such":[104],"as":[105,213,215],"fast":[106],"memory":[107,150],"registers,":[109],"let":[110],"alone":[111],"using":[113],"newer":[114],"advanced":[115],"features":[116],"architecture.":[119],"We":[120],"propose":[121],"new":[123],"approach":[124,199],"improving":[126],"(legacy)":[129],"CUDA":[130,176,211,227],"modern":[133],"machines":[134],"by":[135,171,194],"automatically":[136],"adjusting":[137],"amount":[139,148],"work":[141],"each":[142],"parallel":[143],"thread":[144],"does,":[145],"register":[152],"requires.":[155],"By":[156],"operating":[157],"within":[158],"MLIR":[160],"compiler":[161],"infrastructure,":[162],"we":[163],"are":[164],"able":[165],"also":[167],"target":[168,188],"AMD":[169,222],"performing":[172],"automatic":[173],"translation":[174],"simultaneously":[178],"adjust":[179],"granularity":[182],"fit":[184],"size":[186],"GPUs.":[189],"Combined":[190],"autotuning":[192],"assisted":[193],"platform-specific":[196],"compiler,":[197],"our":[198],"demonstrates":[200],"27%":[201],"geomean":[202],"speedup":[203],"Rodinia":[206],"benchmark":[207],"suite":[208],"over":[209],"baseline":[210],"implementation":[212],"well":[214],"parity":[217],"between":[218],"similar":[219],"NVIDIA":[220],"executing":[224],"program.":[228]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5}],"updated_date":"2026-02-25T08:12:03.925757","created_date":"2025-10-10T00:00:00"}
