{"id":"https://openalex.org/W4399282407","doi":"https://doi.org/10.1145/3650200.3656626","title":"Accelerated Auto-Tuning of GPU Kernels for Tensor Computations","display_name":"Accelerated Auto-Tuning of GPU Kernels for Tensor Computations","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399282407","doi":"https://doi.org/10.1145/3650200.3656626"},"language":"en","primary_location":{"id":"doi:10.1145/3650200.3656626","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3650200.3656626","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3650200.3656626","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 38th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3650200.3656626","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028785761","display_name":"Chendi Li","orcid":"https://orcid.org/0009-0000-2610-042X"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chendi Li","raw_affiliation_strings":["University of Utah, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Utah, United States of America","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015151039","display_name":"Yufan Xu","orcid":"https://orcid.org/0000-0002-7787-6460"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yufan Xu","raw_affiliation_strings":["University of Utah, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Utah, United States of America","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063647877","display_name":"Sina Mahdipour Saravani","orcid":"https://orcid.org/0000-0003-4285-1439"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sina Mahdipour Saravani","raw_affiliation_strings":["University of Utah, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Utah, United States of America","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027517817","display_name":"P. Sadayappan","orcid":"https://orcid.org/0000-0002-4737-2034"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ponnuswamy Sadayappan","raw_affiliation_strings":["University of Utah, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Utah, United States of America","institution_ids":["https://openalex.org/I223532165"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5028785761"],"corresponding_institution_ids":["https://openalex.org/I223532165"],"apc_list":null,"apc_paid":null,"fwci":2.9365,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.91933467,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"549","last_page":"561"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6954851150512695},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6014497876167297},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5926576256752014},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.5297715663909912},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.5189756751060486},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.4183169901371002},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.33432474732398987},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.25815829634666443},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17723897099494934},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.11942607164382935}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6954851150512695},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6014497876167297},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5926576256752014},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.5297715663909912},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5189756751060486},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.4183169901371002},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.33432474732398987},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25815829634666443},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17723897099494934},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.11942607164382935}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3650200.3656626","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3650200.3656626","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3650200.3656626","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 38th ACM International Conference on Supercomputing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3650200.3656626","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3650200.3656626","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3650200.3656626","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 38th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399282407.pdf"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W2055312318","https://openalex.org/W2077143534","https://openalex.org/W2100218206","https://openalex.org/W2158626113","https://openalex.org/W2194775991","https://openalex.org/W2295598076","https://openalex.org/W2570343428","https://openalex.org/W2898099374","https://openalex.org/W2949967139","https://openalex.org/W3174529902","https://openalex.org/W3208285274","https://openalex.org/W4212986322","https://openalex.org/W4221062244","https://openalex.org/W4292975105","https://openalex.org/W4318256790","https://openalex.org/W4318328313","https://openalex.org/W4318541553","https://openalex.org/W4361806118","https://openalex.org/W4377710416","https://openalex.org/W4391623948"],"related_works":["https://openalex.org/W2163816448","https://openalex.org/W2327123731","https://openalex.org/W2009665355","https://openalex.org/W185782823","https://openalex.org/W1429949169","https://openalex.org/W2027201655","https://openalex.org/W2095734710","https://openalex.org/W2168586703","https://openalex.org/W1656096860","https://openalex.org/W2028469001"],"abstract_inverted_index":{"TVM":[0],"is":[1,29],"a":[2,64],"state-of-the-art":[3],"auto-tuning":[4,40,79],"compiler":[5],"for":[6],"the":[7,20,55,83],"synthesis":[8],"of":[9,12,26,66,86],"high-performance":[10,34],"implementations":[11],"tensor":[13],"computations.":[14],"However,":[15],"an":[16,75],"extensive":[17],"search":[18],"in":[19,78],"vast":[21],"design":[22,49],"space":[23,50],"via":[24],"thousands":[25],"compile-execute":[27],"trials":[28],"often":[30],"needed":[31],"to":[32,38,53,81],"identify":[33],"code":[35,56,87],"versions,":[36],"leading":[37],"high":[39],"time.":[41],"This":[42],"paper":[43],"develops":[44],"new":[45],"performance":[46],"modeling":[47],"and":[48,69],"exploration":[51],"strategies":[52],"accelerate":[54],"optimization":[57],"process":[58],"within":[59],"TVM.":[60],"Experimental":[61],"evaluation":[62],"on":[63],"number":[65],"matrix-matrix":[67],"multiplication":[68],"2D":[70],"convolution":[71],"kernels":[72],"demonstrates":[73],"about":[74],"order-of-magnitude":[76],"improvement":[77],"time":[80],"achieve":[82],"same":[84],"level":[85],"performance.":[88]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
