{"id":"https://openalex.org/W4394998528","doi":"https://doi.org/10.1145/3620665.3640390","title":"Optimizing Dynamic-Shape Neural Networks on Accelerators via On-the-Fly Micro-Kernel Polymerization","display_name":"Optimizing Dynamic-Shape Neural Networks on Accelerators via On-the-Fly Micro-Kernel Polymerization","publication_year":2024,"publication_date":"2024-04-22","ids":{"openalex":"https://openalex.org/W4394998528","doi":"https://doi.org/10.1145/3620665.3640390"},"language":"en","primary_location":{"id":"doi:10.1145/3620665.3640390","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640390","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3620665.3640390","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102983574","display_name":"Feng Yu","orcid":"https://orcid.org/0009-0004-0974-0512"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Feng Yu","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China","Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-0974-0512","affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100607669","display_name":"Guangli Li","orcid":"https://orcid.org/0000-0002-9738-261X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["AU","CN"],"is_corresponding":false,"raw_author_name":"Guangli Li","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China","University of New South Wales, Sydney, Australia","Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China University of New South Wales, Sydney, Australia"],"raw_orcid":"https://orcid.org/0000-0002-9738-261X","affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]},{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I31746571","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017298602","display_name":"Jiacheng Zhao","orcid":"https://orcid.org/0000-0001-5228-8972"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiacheng Zhao","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China","Zhongguancun Laboratory, Beijing, China","Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China Zhongguancun Laboratory, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5228-8972","affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Zhongguancun Laboratory, Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China Zhongguancun Laboratory, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086633294","display_name":"Huimin Cui","orcid":"https://orcid.org/0000-0002-2491-7679"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huimin Cui","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China","Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2491-7679","affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053070701","display_name":"Xiaobing Feng","orcid":"https://orcid.org/0000-0003-2909-7750"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobing Feng","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China","Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2909-7750","affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024664385","display_name":"Jingling Xue","orcid":"https://orcid.org/0000-0003-0380-3506"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jingling Xue","raw_affiliation_strings":["University of New South Wales, Sydney, Australia"],"raw_orcid":"https://orcid.org/0000-0003-0380-3506","affiliations":[{"raw_affiliation_string":"University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102983574"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210090176","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":5.2179,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.96465464,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"797","last_page":"812"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9519000053405762,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8187246322631836},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6710679531097412},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6632794737815857},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.5746145844459534},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5632686614990234},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4543955624103546},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.34778153896331787},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.33394238352775574},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.33291375637054443},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.28556376695632935},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.21000272035598755},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07831227779388428}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8187246322631836},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6710679531097412},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6632794737815857},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.5746145844459534},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5632686614990234},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4543955624103546},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.34778153896331787},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.33394238352775574},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.33291375637054443},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28556376695632935},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.21000272035598755},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07831227779388428},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3620665.3640390","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640390","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3620665.3640390","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640390","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.6700000166893005,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G1989434931","display_name":null,"funder_award_id":"2023M733566","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G4039879177","display_name":null,"funder_award_id":"U23B2020","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4478266629","display_name":null,"funder_award_id":"62090024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7223888118","display_name":null,"funder_award_id":"62302479","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8136545603","display_name":null,"funder_award_id":"62232015","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W2097117768","https://openalex.org/W2099001231","https://openalex.org/W2163605009","https://openalex.org/W2172654076","https://openalex.org/W2193413348","https://openalex.org/W2194775991","https://openalex.org/W2515287984","https://openalex.org/W2559655401","https://openalex.org/W2594258618","https://openalex.org/W2595551253","https://openalex.org/W2604272474","https://openalex.org/W2613718673","https://openalex.org/W2622263826","https://openalex.org/W2734941459","https://openalex.org/W2773689216","https://openalex.org/W2780077279","https://openalex.org/W2793888044","https://openalex.org/W2804032941","https://openalex.org/W2805566098","https://openalex.org/W2896457183","https://openalex.org/W2949967139","https://openalex.org/W2963563691","https://openalex.org/W2965373594","https://openalex.org/W2975059944","https://openalex.org/W2978017171","https://openalex.org/W2981758446","https://openalex.org/W2999905431","https://openalex.org/W3012249773","https://openalex.org/W3034107927","https://openalex.org/W3035582633","https://openalex.org/W3097841484","https://openalex.org/W3098220359","https://openalex.org/W3098303312","https://openalex.org/W3118616291","https://openalex.org/W3122286897","https://openalex.org/W3123054690","https://openalex.org/W3130554079","https://openalex.org/W3130716829","https://openalex.org/W3156745629","https://openalex.org/W3157657667","https://openalex.org/W3161395920","https://openalex.org/W3174529902","https://openalex.org/W3177452048","https://openalex.org/W3206343543","https://openalex.org/W3208285274","https://openalex.org/W4212986322","https://openalex.org/W4214512541","https://openalex.org/W4239088979","https://openalex.org/W4286900001","https://openalex.org/W4308083739","https://openalex.org/W4312414053","https://openalex.org/W4313015712","https://openalex.org/W4318541538","https://openalex.org/W4320067926","https://openalex.org/W4381785750","https://openalex.org/W4384705353","https://openalex.org/W4389576338","https://openalex.org/W6754777574","https://openalex.org/W6778883912","https://openalex.org/W6785429063","https://openalex.org/W6893864439"],"related_works":["https://openalex.org/W4240253816","https://openalex.org/W3096456556","https://openalex.org/W2169584677","https://openalex.org/W2979513934","https://openalex.org/W4232954277","https://openalex.org/W2020341030","https://openalex.org/W2749133591","https://openalex.org/W2367473450","https://openalex.org/W23346600","https://openalex.org/W4391382578"],"abstract_inverted_index":{"In":[0],"recent":[1],"times,":[2],"dynamic-shape":[3,62,71,115,139],"neural":[4,63,118],"networks":[5,119],"have":[6],"gained":[7],"widespread":[8],"usage":[9],"in":[10,19,36],"intelligent":[11],"applications":[12],"to":[13,24],"address":[14,53],"complex":[15],"tasks,":[16],"introducing":[17],"challenges":[18],"optimizing":[20],"tensor":[21,50,72,101],"programs":[22],"due":[23],"their":[25],"dynamic":[26,37],"nature.":[27],"As":[28],"the":[29,39,45,54,97,108],"operators'":[30],"shapes":[31],"are":[32],"determined":[33],"at":[34,104],"runtime":[35],"scenarios,":[38],"compilation":[40],"process":[41],"becomes":[42],"expensive,":[43],"limiting":[44],"practicality":[46],"of":[47,61,99,110,146],"existing":[48],"static-shape":[49],"compilers.":[51],"To":[52],"need":[55],"for":[56],"effective":[57],"and":[58,117,128],"efficient":[59],"optimization":[60,82],"networks,":[64],"this":[65],"paper":[66],"introduces":[67],"MikPoly,":[68],"a":[69,80,91,100],"novel":[70],"compiler":[73],"based":[74,95],"on":[75,96,120],"micro-kernel":[76],"polymerization.":[77],"MikPoly":[78,111,136],"employs":[79],"two-stage":[81],"approach,":[83],"dynamically":[84],"combining":[85],"multiple":[86],"statically":[87],"generated":[88],"micro-kernels":[89],"using":[90],"lightweight":[92],"cost":[93],"model":[94],"shape":[98],"operator":[102],"known":[103],"runtime.":[105],"We":[106],"evaluate":[107],"effectiveness":[109],"by":[112],"employing":[113],"popular":[114],"operators":[116],"two":[121],"representative":[122],"accelerators,":[123],"namely":[124],"GPU":[125],"Tensor":[126],"Cores":[127],"Ascend":[129],"NPUs.":[130],"Our":[131],"experimental":[132],"results":[133],"demonstrate":[134],"that":[135],"effectively":[137],"optimizes":[138],"workloads,":[140],"yielding":[141],"an":[142],"average":[143],"performance":[144],"improvement":[145],"1.49\u00d7":[147],"over":[148],"state-of-the-art":[149],"vendor":[150],"libraries.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":8}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
