{"id":"https://openalex.org/W6959452492","doi":"https://doi.org/10.1109/tpds.2025.3587673","title":"High Performance OpenCL-Based GEMM Kernel Auto-Tuned by Bayesian Optimization","display_name":"High Performance OpenCL-Based GEMM Kernel Auto-Tuned by Bayesian Optimization","publication_year":2025,"publication_date":"2025-07-10","ids":{"openalex":"https://openalex.org/W6959452492","doi":"https://doi.org/10.1109/tpds.2025.3587673"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2025.3587673","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3587673","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Shengle Lin","orcid":"https://orcid.org/0000-0003-3329-0924"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shengle Lin","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Guoqing Xiao","orcid":"https://orcid.org/0000-0001-5008-4829"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoqing Xiao","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Haotian Wang","orcid":"https://orcid.org/0000-0002-0086-6301"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haotian Wang","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wangdong Yang","orcid":"https://orcid.org/0000-0003-2681-7898"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wangdong Yang","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kenli Li","orcid":"https://orcid.org/0000-0002-2635-7716"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kenli Li","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","College of Computer Science and Electronic Engineering, Hunan University, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Hunan, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"last","author":{"id":null,"display_name":"Keqin Li","orcid":"https://orcid.org/0000-0001-5224-4048"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Keqin Li","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","College of Computer Science and Electronic Engineering, Hunan University, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Hunan, China","institution_ids":["https://openalex.org/I16609230"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I16609230"],"apc_list":null,"apc_paid":null,"fwci":1.6633,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.89101521,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"36","issue":"9","first_page":"1985","last_page":"1997"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10733","display_name":"Wheat and Barley Genetics and Pathology","score":0.7774999737739563,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10733","display_name":"Wheat and Barley Genetics and Pathology","score":0.7774999737739563,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11229","display_name":"Genetics and Plant Breeding","score":0.04650000110268593,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10012","display_name":"Genetic diversity and population structure","score":0.02449999935925007,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bayesian-optimization","display_name":"Bayesian optimization","score":0.8391000032424927},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.7204999923706055},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6726999878883362},{"id":"https://openalex.org/keywords/tuner","display_name":"Tuner","score":0.6284000277519226},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.45739999413490295},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.42410001158714294},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.38530001044273376},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.3571000099182129}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.890999972820282},{"id":"https://openalex.org/C2778049539","wikidata":"https://www.wikidata.org/wiki/Q17002908","display_name":"Bayesian optimization","level":2,"score":0.8391000032424927},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.7204999923706055},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6726999878883362},{"id":"https://openalex.org/C9819579","wikidata":"https://www.wikidata.org/wiki/Q1544018","display_name":"Tuner","level":3,"score":0.6284000277519226},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5579000115394592},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.45739999413490295},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.42410001158714294},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3919000029563904},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.38530001044273376},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3758000135421753},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.3571000099182129},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.3237000107765198},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.296099990606308},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C2781039887","wikidata":"https://www.wikidata.org/wiki/Q1391724","display_name":"Factor (programming language)","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C122280245","wikidata":"https://www.wikidata.org/wiki/Q620622","display_name":"Kernel method","level":3,"score":0.2676999866962433},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2669000029563904},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.2646999955177307},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.2538999915122986},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2025.3587673","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3587673","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1338791563","display_name":null,"funder_award_id":"2024JJ2026","funder_id":"https://openalex.org/F4320322866","funder_display_name":"Natural Science Foundation of Hainan Province"},{"id":"https://openalex.org/G4221925008","display_name":null,"funder_award_id":"CSTB2022NSCQ-MSX1213","funder_id":"https://openalex.org/F4320323172","funder_display_name":"Natural Science Foundation of Chongqing"},{"id":"https://openalex.org/G5775198436","display_name":null,"funder_award_id":"2023GK2002","funder_id":"https://openalex.org/F4320322866","funder_display_name":"Natural Science Foundation of Hainan Province"}],"funders":[{"id":"https://openalex.org/F4320322866","display_name":"Natural Science Foundation of Hainan Province","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323172","display_name":"Natural Science Foundation of Chongqing","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W74931915","https://openalex.org/W182691100","https://openalex.org/W1510052597","https://openalex.org/W1781197125","https://openalex.org/W1969513389","https://openalex.org/W1978642402","https://openalex.org/W2010936531","https://openalex.org/W2012843882","https://openalex.org/W2023930909","https://openalex.org/W2059745360","https://openalex.org/W2062368747","https://openalex.org/W2064872546","https://openalex.org/W2065493976","https://openalex.org/W2079658918","https://openalex.org/W2100218206","https://openalex.org/W2107911628","https://openalex.org/W2192203593","https://openalex.org/W2232645663","https://openalex.org/W2252007067","https://openalex.org/W2615663338","https://openalex.org/W2913790721","https://openalex.org/W2961614712","https://openalex.org/W2999925821","https://openalex.org/W3007718266","https://openalex.org/W3039715620","https://openalex.org/W3091741474","https://openalex.org/W3121665290","https://openalex.org/W3131110481","https://openalex.org/W3159436656","https://openalex.org/W3176750915","https://openalex.org/W3205436637","https://openalex.org/W4205588253","https://openalex.org/W4211049957","https://openalex.org/W4221000689","https://openalex.org/W4285503858","https://openalex.org/W4321764662","https://openalex.org/W4381327316","https://openalex.org/W4384616347","https://openalex.org/W4385429973","https://openalex.org/W6675200109","https://openalex.org/W6780740362"],"related_works":[],"abstract_inverted_index":{"OpenCL":[0,54,120],"has":[1],"become":[2],"the":[3,38,123],"favored":[4],"framework":[5],"for":[6,105],"emerging":[7],"heterogeneous":[8],"devices":[9],"and":[10,16,114,129],"FPGAs,":[11],"owing":[12],"to":[13,87],"its":[14],"versatility":[15],"portability.":[17],"However,":[18],"OpenCL-based":[19],"math":[20],"libraries":[21],"still":[22],"face":[23],"challenges":[24],"in":[25],"fully":[26],"leveraging":[27],"device":[28,89],"performance.":[29],"When":[30],"deploying":[31],"high-performance":[32],"arithmetic":[33],"applications":[34],"on":[35,118],"these":[36],"devices,":[37],"most":[39],"important":[40],"hot":[41],"function":[42],"is":[43],"General":[44],"Matrix-matrix":[45],"Multiplication":[46],"(GEMM).":[47],"This":[48],"study":[49],"presents":[50,99],"a":[51,66,80,100],"meticulously":[52],"optimized":[53],"GEMM":[55,59],"kernel.":[56],"Our":[57],"enhanced":[58],"kernel":[60,106],"emphasizes":[61],"two":[62],"key":[63],"improvements:":[64],"1)":[65],"three-level":[67],"double":[68],"buffer":[69],"pipeline":[70],"that":[71],"efficiently":[72],"overlaps":[73],"data":[74],"fetching":[75],"with":[76],"floating-point":[77],"computations;":[78],"2)":[79],"fine-grained":[81],"prefetching":[82],"strategy":[83],"of":[84],"private":[85],"memory":[86],"increase":[88],"occupancy":[90],"by":[91],"optimizing":[92],"register":[93],"unit":[94],"utilization.":[95],"Furthermore,":[96],"this":[97],"work":[98],"Bayesian":[101],"Optimization":[102],"(BO)":[103],"tuner":[104,125],"auto-tuning.":[107],"Experimental":[108],"results":[109],"demonstrate":[110],"considerable":[111],"optimization":[112],"improvement":[113],"performance":[115],"advantages":[116],"achieved":[117],"diverse":[119],"devices.":[121],"Additionally,":[122],"BO":[124],"demonstrates":[126],"superior":[127],"efficiency":[128],"robustness,":[130],"outperforming":[131],"contemporary":[132],"tuning":[133],"methods.":[134]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
