{"id":"https://openalex.org/W4416155381","doi":"https://doi.org/10.1145/3712285.3759898","title":"Constraint-Driven Auto-Tuning of GEMM-like Operators for MT-3000 Many-core Processor","display_name":"Constraint-Driven Auto-Tuning of GEMM-like Operators for MT-3000 Many-core Processor","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416155381","doi":"https://doi.org/10.1145/3712285.3759898"},"language":null,"primary_location":{"id":"doi:10.1145/3712285.3759898","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759898","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065038356","display_name":"Xinxin Qi","orcid":"https://orcid.org/0000-0001-8316-2934"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinxin Qi","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083171604","display_name":"Jianbin Fang","orcid":"https://orcid.org/0000-0003-3542-4869"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianbin Fang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092019932","display_name":"Peng Zhang","orcid":"https://orcid.org/0000-0001-8364-9793"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Zhang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008369819","display_name":"Yonggang Che","orcid":"https://orcid.org/0000-0001-6906-4940"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yonggang Che","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023610301","display_name":"Jie Ren","orcid":"https://orcid.org/0000-0003-3183-7228"},"institutions":[{"id":"https://openalex.org/I88830068","display_name":"Shaanxi Normal University","ror":"https://ror.org/0170z8493","country_code":"CN","type":"education","lineage":["https://openalex.org/I88830068"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Ren","raw_affiliation_strings":["School of Artificial Intelligence and Computer Science, Shaanxi Normal University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Computer Science, Shaanxi Normal University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I88830068"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5065038356"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":5.1011,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95771805,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"185","last_page":"199"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8575000166893005,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8575000166893005,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.05310000106692314,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.016499999910593033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5572999715805054},{"id":"https://openalex.org/keywords/constraint-programming","display_name":"Constraint programming","score":0.4781999886035919},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.4596000015735626},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.42910000681877136},{"id":"https://openalex.org/keywords/register-allocation","display_name":"Register allocation","score":0.4228000044822693},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.4004000127315521},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.3896999955177307},{"id":"https://openalex.org/keywords/transpose","display_name":"Transpose","score":0.3537999987602234}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7919999957084656},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5572999715805054},{"id":"https://openalex.org/C173404611","wikidata":"https://www.wikidata.org/wiki/Q528588","display_name":"Constraint programming","level":3,"score":0.4781999886035919},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.4596000015735626},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.42910000681877136},{"id":"https://openalex.org/C128916667","wikidata":"https://www.wikidata.org/wiki/Q1343660","display_name":"Register allocation","level":3,"score":0.4228000044822693},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.4004000127315521},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.3896999955177307},{"id":"https://openalex.org/C200106649","wikidata":"https://www.wikidata.org/wiki/Q223683","display_name":"Transpose","level":3,"score":0.3537999987602234},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.35179999470710754},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3476000130176544},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.3472999930381775},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34610000252723694},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.3458999991416931},{"id":"https://openalex.org/C56086750","wikidata":"https://www.wikidata.org/wiki/Q6042592","display_name":"Integer programming","level":2,"score":0.34549999237060547},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3125999867916107},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.30309998989105225},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.29330000281333923},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2784999907016754},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C134765980","wikidata":"https://www.wikidata.org/wiki/Q879126","display_name":"Bitwise operation","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.2605000138282776}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712285.3759898","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759898","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2048558763","https://openalex.org/W2063123613","https://openalex.org/W2084379367","https://openalex.org/W2110612452","https://openalex.org/W2194775991","https://openalex.org/W2302330221","https://openalex.org/W2618530766","https://openalex.org/W2728256789","https://openalex.org/W2963037989","https://openalex.org/W2973166032","https://openalex.org/W3016842236","https://openalex.org/W3119880013","https://openalex.org/W3141650078","https://openalex.org/W3177452048","https://openalex.org/W3192336523","https://openalex.org/W4281390154","https://openalex.org/W4281932226","https://openalex.org/W4296824640","https://openalex.org/W4327930463","https://openalex.org/W4378227035","https://openalex.org/W4394998528","https://openalex.org/W4400411561"],"related_works":[],"abstract_inverted_index":{"Optimizing":[0],"deep":[1],"learning":[2],"(DL)":[3],"operators,":[4,68],"particularly":[5],"GEMM-like":[6],"operations,":[7],"for":[8,58,134,164],"emerging":[9],"heterogeneous":[10,165],"many-core":[11,166],"processors":[12],"like":[13],"MT-3000":[14],"is":[15],"challenging":[16],"due":[17,46],"to":[18,40,47,71,112],"the":[19,62,81,86,135],"large":[20],"search":[21,49,87],"space":[22],"and":[23,64,75,119,146],"hardware-specific":[24],"constraints.":[25],"Existing":[26],"approaches":[27],"-":[28,36],"such":[29],"as":[30],"hand-crafted":[31],"libraries":[32],"or":[33,42],"general-purpose":[34],"auto-tuners":[35],"are":[37],"either":[38],"expensive":[39,48],"develop":[41],"deliver":[43],"sub-optimal":[44],"performance":[45,170],"overheads.":[50],"We":[51],"present":[52],"DynaChain,":[53],"an":[54,105],"operator-level":[55],"optimization":[56],"framework":[57],"MT-3000.":[59],"DynaChain":[60,89,127,160],"decouples":[61],"computation":[63],"data":[65,78],"movement":[66],"of":[67,154],"allowing":[69],"each":[70],"be":[72],"optimized":[73,133],"independently":[74],"maximizing":[76],"global":[77],"reuse":[79],"across":[80],"operator":[82],"schedule.":[83],"To":[84],"reduce":[85],"space,":[88],"introduces":[90],"constraint":[91],"dependency":[92],"chains":[93],"that":[94,159],"dynamically":[95],"eliminate":[96],"invalid":[97],"scheduling":[98],"options":[99],"during":[100],"exploration.":[101],"It":[102],"then":[103],"applies":[104],"integer":[106],"linear":[107],"programming":[108],"(ILP)":[109],"based":[110],"decomposition":[111],"handle":[113],"irregular":[114,140],"matrix":[115],"dimensions,":[116],"avoiding":[117],"padding":[118],"improving":[120],"hardware":[121],"utilization.":[122],"For":[123],"low-level":[124],"code":[125],"generation,":[126],"offers":[128],"a":[129,152],"hardware-aware":[130],"micro-kernel":[131],"design":[132],"MT-3000\u2019s":[136],"VLIW+SIMD":[137],"architecture,":[138],"supporting":[139],"operations":[141],"through":[142],"improved":[143],"register":[144],"allocation":[145],"instruction":[147],"pipelining.":[148],"Experimental":[149],"results":[150],"on":[151,171],"range":[153],"representative":[155],"DL":[156],"operators":[157],"demonstrate":[158],"simplifies":[161],"kernel":[162],"development":[163],"architectures":[167],"while":[168],"delivering":[169],"par":[172],"with":[173],"expert-optimized":[174],"libraries.":[175]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-12T00:00:00"}
