{"id":"https://openalex.org/W4381785750","doi":"https://doi.org/10.1109/tc.2023.3288758","title":"HAOTuner: A Hardware Adaptive Operator Auto-Tuner for Dynamic Shape Tensor Compilers","display_name":"HAOTuner: A Hardware Adaptive Operator Auto-Tuner for Dynamic Shape Tensor Compilers","publication_year":2023,"publication_date":"2023-06-23","ids":{"openalex":"https://openalex.org/W4381785750","doi":"https://doi.org/10.1109/tc.2023.3288758"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2023.3288758","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2023.3288758","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092246483","display_name":"Pengyu Mu","orcid":"https://orcid.org/0000-0002-4847-8148"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pengyu Mu","raw_affiliation_strings":["Hangzhou Innovation Institute, Beihang University, Hangzhou, China","School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100330624","display_name":"Yi Liu","orcid":"https://orcid.org/0000-0003-1829-2817"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Liu","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100431390","display_name":"Rui Wang","orcid":"https://orcid.org/0000-0003-2741-6033"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Wang","raw_affiliation_strings":["Hangzhou Innovation Institute, Beihang University, Hangzhou, China","School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Innovation Institute, Beihang University, Hangzhou, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101656256","display_name":"Guoxiang Liu","orcid":"https://orcid.org/0009-0001-6163-2074"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoxiang Liu","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727861","display_name":"Zhonghao Sun","orcid":"https://orcid.org/0009-0009-1098-2451"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhonghao Sun","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018705589","display_name":"Hailong Yang","orcid":"https://orcid.org/0000-0003-1101-7927"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hailong Yang","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074183877","display_name":"Zhongzhi Luan","orcid":"https://orcid.org/0000-0002-7186-0556"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongzhi Luan","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079362609","display_name":"Depei Qian","orcid":"https://orcid.org/0000-0002-5382-1473"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Depei Qian","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5092246483"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":2.7721,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.90562186,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"72","issue":"11","first_page":"3178","last_page":"3190"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9632999897003174,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7887614965438843},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6552242040634155},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5934516787528992},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.5887824296951294},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.5476977229118347},{"id":"https://openalex.org/keywords/tuner","display_name":"Tuner","score":0.5377979278564453},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5370961427688599},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5042823553085327},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4029635190963745},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3535389304161072},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2531129717826843},{"id":"https://openalex.org/keywords/radio-frequency","display_name":"Radio frequency","score":0.12218788266181946},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10664486885070801}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7887614965438843},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6552242040634155},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5934516787528992},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.5887824296951294},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.5476977229118347},{"id":"https://openalex.org/C9819579","wikidata":"https://www.wikidata.org/wiki/Q1544018","display_name":"Tuner","level":3,"score":0.5377979278564453},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5370961427688599},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5042823553085327},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4029635190963745},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3535389304161072},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2531129717826843},{"id":"https://openalex.org/C74064498","wikidata":"https://www.wikidata.org/wiki/Q3396184","display_name":"Radio frequency","level":2,"score":0.12218788266181946},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10664486885070801},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2023.3288758","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2023.3288758","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5774836301","display_name":null,"funder_award_id":"62072018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6322973505","display_name":null,"funder_award_id":"61732002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8014835415","display_name":null,"funder_award_id":"U22A2028","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8863380278","display_name":null,"funder_award_id":"62076168","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2122838776","https://openalex.org/W2193413348","https://openalex.org/W2295598076","https://openalex.org/W2768348081","https://openalex.org/W2804032941","https://openalex.org/W2896457183","https://openalex.org/W2898106867","https://openalex.org/W2961619211","https://openalex.org/W2979365412","https://openalex.org/W3001542287","https://openalex.org/W3008788679","https://openalex.org/W3034107927","https://openalex.org/W3094502228","https://openalex.org/W3096395190","https://openalex.org/W3122286897","https://openalex.org/W3161395920","https://openalex.org/W3174086521","https://openalex.org/W3181316986","https://openalex.org/W3207748736","https://openalex.org/W4221155291","https://openalex.org/W4286900001","https://openalex.org/W4302296459","https://openalex.org/W4385245566","https://openalex.org/W6637151318","https://openalex.org/W6687566353","https://openalex.org/W6739901393","https://openalex.org/W6745609711","https://openalex.org/W6751349269","https://openalex.org/W6755207826","https://openalex.org/W6767220000","https://openalex.org/W6772040440","https://openalex.org/W6779627212","https://openalex.org/W6779728309","https://openalex.org/W6784333009","https://openalex.org/W6798295750","https://openalex.org/W6802856854","https://openalex.org/W6803128811","https://openalex.org/W6809366197","https://openalex.org/W6893864439"],"related_works":["https://openalex.org/W2154107929","https://openalex.org/W4290057712","https://openalex.org/W2168846948","https://openalex.org/W2559413996","https://openalex.org/W2559844932","https://openalex.org/W2185884583","https://openalex.org/W2096827166","https://openalex.org/W2108953480","https://openalex.org/W2549354931","https://openalex.org/W2155442923"],"abstract_inverted_index":{"Deep":[0],"learning":[1,93],"compilers":[2],"with":[3,241],"auto-tuners":[4,42],"have":[5,115],"the":[6,18,30,45,74,104,109,118,121,130,138,144,160,192,214,238],"ability":[7],"to":[8,53,153,172,175,187],"generate":[9],"high-performance":[10],"programs,":[11],"particularly":[12],"tensor":[13,22,31,46,62,139,218],"programs":[14,23,47],"on":[15,70,137,143,195,202],"accelerators.":[16],"However,":[17],"performance":[19,75],"of":[20,39,106,111,120,132,191,206,222,228,245,251],"these":[21,85],"is":[24,33,68,170],"shape-sensitive":[25],"and":[26,56,114,230,253],"hardware":[27,146,173,178,197],"resource-sensitive.":[28],"When":[29],"shape":[32,100,217],"only":[34,136],"known":[35],"at":[36],"runtime":[37],"instead":[38],"compile":[40],"time,":[41],"must":[43],"tune":[44],"for":[48,65,98],"every":[49],"possible":[50],"shape,":[51],"leading":[52],"significant":[54],"time":[55,224,232,247,255],"cost":[57,167,193],"overhead.":[58],"Additionally,":[59],"if":[60],"a":[61,71,90,166,183],"program":[63],"tuned":[64],"one":[66],"device":[67],"deployed":[69],"different":[72,196,204],"device,":[73],"may":[76],"not":[77,135],"be":[78],"as":[79,81,108,157],"optimal":[80],"before.":[82],"To":[83],"address":[84],"challenges,":[86],"we":[87,128,149,181],"propose":[88],"HAOTuner,":[89,127],"hardware-adaptive":[91],"deep":[92],"operator":[94],"auto-tuner":[95,219],"specifically":[96],"designed":[97],"dynamic":[99,216],"tensors.":[101],"We":[102,163,199],"leverage":[103],"concept":[105],"micro-kernels":[107,133,156],"unit":[110],"task":[112],"allocation":[113],"observed":[116],"that":[117,169,211],"size":[119,131],"micro-kernel":[122],"greatly":[123],"impacts":[124],"performance.":[125],"In":[126],"determine":[129],"based":[134],"shapes":[140],"but":[141],"also":[142,164],"available":[145],"resources.":[147],"Specifically,":[148],"present":[150],"an":[151,226,249],"algorithm":[152],"select":[154],"hardware-friendly":[155],"candidates,":[158],"reducing":[159],"tuning":[161,231,254],"time.":[162],"design":[165],"model":[168,184,194],"sensitive":[171],"resources":[174],"support":[176],"various":[177],"architectures.":[179],"Furthermore,":[180],"provide":[182],"transfer":[185],"solution":[186],"enable":[188],"fast":[189],"deployment":[190],"platforms.":[198],"evaluate":[200],"HAOTuner":[201,212,236],"six":[203],"types":[205],"GPUs.":[207],"The":[208],"experiments":[209],"demonstrate":[210],"surpasses":[213],"state-of-the-art":[215,239],"in":[220,243],"terms":[221,244],"running":[223,246],"by":[225,233,248,256],"average":[227,250],"26%":[229],"25%.":[234],"Moreover,":[235],"outperforms":[237],"compiler":[240],"padding":[242],"39%":[252],"6\u00d7.":[257]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
