{"id":"https://openalex.org/W4360831828","doi":"https://doi.org/10.1109/hpca56546.2023.10071018","title":"Chimera: An Analytical Optimizing Framework for Effective Compute-intensive Operators Fusion","display_name":"Chimera: An Analytical Optimizing Framework for Effective Compute-intensive Operators Fusion","publication_year":2023,"publication_date":"2023-02-01","ids":{"openalex":"https://openalex.org/W4360831828","doi":"https://doi.org/10.1109/hpca56546.2023.10071018"},"language":"en","primary_location":{"id":"doi:10.1109/hpca56546.2023.10071018","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10071018","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072186367","display_name":"Size Zheng","orcid":"https://orcid.org/0000-0002-9471-1780"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Size Zheng","raw_affiliation_strings":["Peking University"],"affiliations":[{"raw_affiliation_string":"Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034391486","display_name":"Siyuan Chen","orcid":"https://orcid.org/0000-0002-3118-5963"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyuan Chen","raw_affiliation_strings":["Peking University"],"affiliations":[{"raw_affiliation_string":"Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112641741","display_name":"Peidi Song","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peidi Song","raw_affiliation_strings":["Peking University"],"affiliations":[{"raw_affiliation_string":"Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004459947","display_name":"Renze Chen","orcid":"https://orcid.org/0000-0001-5938-7965"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Renze Chen","raw_affiliation_strings":["Peking University"],"affiliations":[{"raw_affiliation_string":"Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100663885","display_name":"Xiuhong Li","orcid":"https://orcid.org/0000-0002-5327-0907"},"institutions":[{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Xiuhong Li","raw_affiliation_strings":["Sensetime Research","Shanghai AI Lab"],"affiliations":[{"raw_affiliation_string":"Sensetime Research","institution_ids":[]},{"raw_affiliation_string":"Shanghai AI Lab","institution_ids":["https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049910854","display_name":"Shengen Yan","orcid":"https://orcid.org/0009-0005-3858-7972"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shengen Yan","raw_affiliation_strings":["Sensetime Research"],"affiliations":[{"raw_affiliation_string":"Sensetime Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010087030","display_name":"Dahua Lin","orcid":"https://orcid.org/0000-0002-8865-7896"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dahua Lin","raw_affiliation_strings":["The Chinese University of Hong Kong","Shanghai AI Lab"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Shanghai AI Lab","institution_ids":["https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003939279","display_name":"Jingwen Leng","orcid":"https://orcid.org/0000-0002-5660-5493"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingwen Leng","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100604860","display_name":"Yun Liang","orcid":"https://orcid.org/0000-0002-9076-7998"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210165198","display_name":"Beijing Advanced Sciences and Innovation Center","ror":"https://ror.org/05qm21180","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165198"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yun Liang","raw_affiliation_strings":["Peking University","Beijing Advanced Innovation Center for Integrated Circuits"],"affiliations":[{"raw_affiliation_string":"Peking University","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Beijing Advanced Innovation Center for Integrated Circuits","institution_ids":["https://openalex.org/I4210165198"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5072186367"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":4.5299,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.9595887,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1113","last_page":"1126"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7822362184524536},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.6320675015449524},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5455299615859985},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.46784815192222595},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.45095837116241455},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4166503846645355},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3723817467689514},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3607434630393982},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.33339112997055054},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15082502365112305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7822362184524536},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.6320675015449524},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5455299615859985},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.46784815192222595},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.45095837116241455},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4166503846645355},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3723817467689514},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3607434630393982},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.33339112997055054},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15082502365112305},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C86339819","wikidata":"https://www.wikidata.org/wiki/Q407384","display_name":"Transcription factor","level":3,"score":0.0},{"id":"https://openalex.org/C158448853","wikidata":"https://www.wikidata.org/wiki/Q425218","display_name":"Repressor","level":4,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca56546.2023.10071018","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10071018","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320318547","display_name":"Baidu","ror":"https://ror.org/03vs3wt56"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":77,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1972783048","https://openalex.org/W2023415862","https://openalex.org/W2055312318","https://openalex.org/W2152517358","https://openalex.org/W2194775991","https://openalex.org/W2471164860","https://openalex.org/W2516525699","https://openalex.org/W2618530766","https://openalex.org/W2627042741","https://openalex.org/W2766166018","https://openalex.org/W2786320458","https://openalex.org/W2804032941","https://openalex.org/W2804500013","https://openalex.org/W2896457183","https://openalex.org/W2913790721","https://openalex.org/W2921898934","https://openalex.org/W2935331687","https://openalex.org/W2940126124","https://openalex.org/W2940862705","https://openalex.org/W2949967139","https://openalex.org/W2961619211","https://openalex.org/W2963037989","https://openalex.org/W2980104813","https://openalex.org/W2981758446","https://openalex.org/W3012249773","https://openalex.org/W3037768171","https://openalex.org/W3081486497","https://openalex.org/W3088415669","https://openalex.org/W3094502228","https://openalex.org/W3096395190","https://openalex.org/W3097841484","https://openalex.org/W3097965454","https://openalex.org/W3123054690","https://openalex.org/W3135807226","https://openalex.org/W3153174751","https://openalex.org/W3156745629","https://openalex.org/W3157506437","https://openalex.org/W3157657667","https://openalex.org/W3174529902","https://openalex.org/W3177452048","https://openalex.org/W3192336523","https://openalex.org/W3193212547","https://openalex.org/W3206719067","https://openalex.org/W3208285274","https://openalex.org/W4206792915","https://openalex.org/W4212986322","https://openalex.org/W4246166885","https://openalex.org/W4249932213","https://openalex.org/W4280562683","https://openalex.org/W4281932226","https://openalex.org/W4288083528","https://openalex.org/W4292779060","https://openalex.org/W4293584584","https://openalex.org/W4295312788","https://openalex.org/W4385245566","https://openalex.org/W6620707391","https://openalex.org/W6684191040","https://openalex.org/W6695314431","https://openalex.org/W6748324931","https://openalex.org/W6750227808","https://openalex.org/W6751349269","https://openalex.org/W6752057402","https://openalex.org/W6755207826","https://openalex.org/W6757717574","https://openalex.org/W6761772364","https://openalex.org/W6766978945","https://openalex.org/W6778883912","https://openalex.org/W6779607338","https://openalex.org/W6779728309","https://openalex.org/W6783460666","https://openalex.org/W6783774778","https://openalex.org/W6784333009","https://openalex.org/W6784999070","https://openalex.org/W6794125031","https://openalex.org/W6795140394","https://openalex.org/W6802671585"],"related_works":["https://openalex.org/W1555349535","https://openalex.org/W1556451512","https://openalex.org/W4234091740","https://openalex.org/W4213350282","https://openalex.org/W2230171082","https://openalex.org/W2583128298","https://openalex.org/W2369125128","https://openalex.org/W2022275305","https://openalex.org/W2134423494","https://openalex.org/W2029210135"],"abstract_inverted_index":{"Machine":[0],"learning":[1,38,97],"models":[2],"with":[3],"various":[4],"tensor":[5],"operators":[6,18,23,30,41,90],"are":[7,14,42,75,176,258],"becoming":[8],"ubiquitous":[9],"in":[10,19,45,94],"recent":[11],"years.":[12],"There":[13],"two":[15],"types":[16],"of":[17,53,70,140,155,158,226],"machine":[20,37,96],"learning:":[21],"compute-intensive":[22,40,72,89,108,141,151,222],"(e.g.,":[24,31],"GEMM":[25,228],"and":[26,28,33,60,80,104,174,230,236,246,263,268],"convolution)":[27],"memory-intensive":[29],"ReLU":[32],"softmax).":[34],"In":[35,148],"emerging":[36],"models,":[39],"usually":[43,118],"organized":[44],"a":[46,115,156],"chain":[47],"structure.":[48],"With":[49],"the":[50,55,68,138,167,183,190,256],"continual":[51],"specialization":[52],"hardware,":[54],"gap":[56],"between":[57],"computing":[58],"performance":[59,121],"memory":[61,78],"bandwidth":[62],"has":[63],"become":[64],"more":[65],"prominent.":[66],"Consequently,":[67],"implementations":[69],"many":[71],"operator":[73,109,124,142,152,168,223],"chains":[74,110,143,229,232],"bounded":[76],"by":[77,188],"bandwidth,":[79],"generating":[81],"fused":[82,164,219],"kernels":[83,165,208,220],"to":[84,209,243,249,253,260],"improve":[85,137],"locality":[86,139],"for":[87,107,122,166,171,213,221,265],"these":[88,123],"becomes":[91],"necessary.":[92],"But":[93],"existing":[95],"compilers,":[98,255],"there":[99],"lack":[100],"both":[101,172],"precise":[102],"analysis":[103],"efficient":[105,163],"optimization":[106],"on":[111,144,233],"different":[112,145,214],"accelerators.":[113,147,215],"As":[114],"result,":[116],"they":[117],"produce":[119],"sub-optimal":[120],"chains.In":[125],"this":[126],"paper,":[127],"we":[128],"propose":[129],"Chimera,":[130,149],"an":[131,197],"optimizing":[132],"framework":[133],"that":[134,239],"can":[135],"efficiently":[136],"hardware":[146],"each":[150],"is":[153],"composed":[154],"series":[157],"computation":[159],"blocks.":[160],"To":[161],"generate":[162],"chains,":[169],"optimizations":[170,212],"inter-block":[173,179],"intra-block":[175,201],"required.":[177],"For":[178,200],"optimization,":[180,202],"Chimera":[181,203,217,240],"decides":[182],"optimized":[184],"block":[185],"execution":[186],"order":[187],"minimizing":[189],"data":[191],"movement":[192],"volume":[193],"among":[194],"blocks":[195],"using":[196],"analytical":[198],"model.":[199],"uses":[204],"unified":[205],"replaceable":[206],"micro":[207],"apply":[210],"hardware-specific":[211],"Finally,":[216],"generates":[218],"chains.":[224],"Evaluation":[225],"batch":[227],"convolution":[231],"CPU,":[234,266],"GPU,":[235,267],"NPU":[237],"shows":[238],"achieves":[241],"up":[242,259],"2.87\u00d7,":[244],"2.29\u00d7,":[245,261],"2.39\u00d7":[247],"speedups":[248,257],"hand-tuned":[250],"libraries.":[251],"Compared":[252],"state-of-the-art":[254],"1.64\u00d7,":[262],"1.14\u00d7":[264],"NPU.":[269]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":4}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
