{"id":"https://openalex.org/W3123639931","doi":"https://doi.org/10.1109/cgo51591.2021.9370330","title":"UNIT: Unifying Tensorized Instruction Compilation","display_name":"UNIT: Unifying Tensorized Instruction Compilation","publication_year":2021,"publication_date":"2021-02-27","ids":{"openalex":"https://openalex.org/W3123639931","doi":"https://doi.org/10.1109/cgo51591.2021.9370330","mag":"3123639931"},"language":"en","primary_location":{"id":"doi:10.1109/cgo51591.2021.9370330","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cgo51591.2021.9370330","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2101.08458","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102898841","display_name":"Jian Weng","orcid":"https://orcid.org/0000-0002-7933-9941"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jian Weng","raw_affiliation_strings":["Amazon Web Services, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041306152","display_name":"Animesh Jain","orcid":"https://orcid.org/0000-0003-0250-3608"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Animesh Jain","raw_affiliation_strings":["Amazon Web Services, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106290765","display_name":"Jie Wang","orcid":"https://orcid.org/0000-0002-9937-2351"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jie Wang","raw_affiliation_strings":["Amazon Web Services, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070031492","display_name":"Leyuan Wang","orcid":"https://orcid.org/0000-0003-4084-4470"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Leyuan Wang","raw_affiliation_strings":["Amazon Web Services, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101638214","display_name":"Yida Wang","orcid":"https://orcid.org/0000-0001-8165-840X"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yida Wang","raw_affiliation_strings":["Amazon Web Services, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006723758","display_name":"Tony Nowatzki","orcid":"https://orcid.org/0000-0001-8483-3824"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tony Nowatzki","raw_affiliation_strings":["University of California, Los Angeles, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Los Angeles, USA","institution_ids":["https://openalex.org/I161318765"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102898841"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":4.9309,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.95746648,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"77","last_page":"89"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9071983098983765},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6640747785568237},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4552953541278839},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.4312398433685303},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4302457571029663},{"id":"https://openalex.org/keywords/intrinsics","display_name":"Intrinsics","score":0.4160490036010742},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.39926162362098694},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3543951213359833}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9071983098983765},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6640747785568237},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4552953541278839},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4312398433685303},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4302457571029663},{"id":"https://openalex.org/C2908650547","wikidata":"https://www.wikidata.org/wiki/Q20999234","display_name":"Intrinsics","level":2,"score":0.4160490036010742},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.39926162362098694},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3543951213359833}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/cgo51591.2021.9370330","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cgo51591.2021.9370330","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2101.08458","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2101.08458","pdf_url":"https://arxiv.org/pdf/2101.08458","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2101.08458","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2101.08458","pdf_url":"https://arxiv.org/pdf/2101.08458","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.5299999713897705,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2283229822","display_name":null,"funder_award_id":"CCF-1751400","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1991009705","https://openalex.org/W2055312318","https://openalex.org/W2077143534","https://openalex.org/W2090268225","https://openalex.org/W2099404643","https://openalex.org/W2108598243","https://openalex.org/W2111394443","https://openalex.org/W2127324789","https://openalex.org/W2626211758","https://openalex.org/W2626953429","https://openalex.org/W2763421725","https://openalex.org/W2786320458","https://openalex.org/W2790925711","https://openalex.org/W2798341898","https://openalex.org/W2804032941","https://openalex.org/W2804500013","https://openalex.org/W2805566098","https://openalex.org/W2889647155","https://openalex.org/W2899644485","https://openalex.org/W2914500262","https://openalex.org/W2953184160","https://openalex.org/W2963947383","https://openalex.org/W2963960923","https://openalex.org/W2980270412","https://openalex.org/W2997238241","https://openalex.org/W2998570630","https://openalex.org/W3004731435","https://openalex.org/W3016220765","https://openalex.org/W3016769527","https://openalex.org/W3033509590","https://openalex.org/W3036648875","https://openalex.org/W3036692157","https://openalex.org/W3042501387","https://openalex.org/W3043303806","https://openalex.org/W3048263222","https://openalex.org/W3098220359","https://openalex.org/W4239072543","https://openalex.org/W4244024631","https://openalex.org/W4244894488","https://openalex.org/W4245302940","https://openalex.org/W6751109714","https://openalex.org/W6751349269","https://openalex.org/W6752057402","https://openalex.org/W6754777574","https://openalex.org/W6773852563"],"related_works":["https://openalex.org/W4313423325","https://openalex.org/W2013156670","https://openalex.org/W1998013902","https://openalex.org/W2550108858","https://openalex.org/W1781968824","https://openalex.org/W4247993032","https://openalex.org/W3145476088","https://openalex.org/W2117274229","https://openalex.org/W1963867998","https://openalex.org/W3002622661"],"abstract_inverted_index":{"Because":[0],"of":[1,51,54,98,202,209,244,251,262],"the":[2,22,52,183,200,207,210,242,245,248,252,256,263],"increasing":[3],"demand":[4],"for":[5,64,101,121,145,158,185,228,313],"intensive":[6],"computation":[7],"in":[8],"deep":[9],"neural":[10],"networks,":[11],"researchers":[12],"have":[13],"developed":[14],"both":[15],"hardware":[16,48,58,138,277],"and":[17,24,75,128,143,206,212,235,254,304],"software":[18],"mechanisms":[19],"to":[20,32,42,108,117,135,151,168,181,190,259,267,273],"reduce":[21],"compute":[23],"memory":[25],"burden.":[26],"A":[27],"widely":[28],"adopted":[29],"approach":[30,115,133,192],"is":[31,40,116,126,134,141,193,271],"use":[33,118],"mixed":[34,45],"precision":[35,46,89,94],"data":[36,55],"types.":[37],"However,":[38],"it":[39,106,166],"hard":[41,107],"benefit":[43],"from":[44,217],"without":[47],"specialization":[49],"because":[50],"overhead":[53],"casting.":[56],"Recently,":[57],"vendors":[59],"offer":[60],"tensorized":[61,170,186,233,264],"instructions":[62,79,204,216],"specialized":[63],"mixed-precision":[65],"tensor":[66,237],"operations,":[67],"such":[68],"as":[69],"Intel":[70,289],"VNNI,":[71],"Nvidia":[72,298,302],"Tensor":[73],"Core,":[74],"ARM":[76,314,318],"DOT.":[77],"These":[78],"involve":[80],"a":[81,177,194,232,236,308],"new":[82,203],"computing":[83],"idiom,":[84],"which":[85,140,198],"reduces":[86],"multiple":[87],"low":[88],"elements":[90],"into":[91],"one":[92,114],"high":[93],"element.":[95],"The":[96,188,279],"lack":[97],"compilation":[99,184],"techniques":[100],"this":[102,125,153,173,191],"emerging":[103],"idiom":[104],"makes":[105,199],"utilize":[109],"these":[110],"instructions.":[111,171,187],"In":[112,172],"practice,":[113],"vendor-provided":[119],"libraries":[120],"computationally-intensive":[122],"kernels,":[123],"but":[124],"inflexible":[127],"prevents":[129],"further":[130],"optimizations.":[131],"Another":[132],"manually":[136],"write":[137],"intrinsics,":[139],"error-prone":[142],"difficult":[144],"programmers.":[146],"Some":[147],"prior":[148],"works":[149],"tried":[150],"address":[152],"problem":[154],"by":[155],"creating":[156],"compilers":[157],"each":[159],"instruction.":[160,265],"This":[161],"requires":[162],"excessive":[163],"efforts":[164],"when":[165],"comes":[167],"many":[169],"work,":[174],"we":[175],"develop":[176],"compiler":[178],"framework,":[179],"UNIT,":[180],"unify":[182],"key":[189],"unified":[195],"semantics":[196],"abstraction":[197],"integration":[201],"easy,":[205],"reuse":[208],"analysis":[211],"transformations":[213],"possible.":[214],"Tensorized":[215],"different":[218],"platforms":[219],"can":[220],"be":[221],"compiled":[222],"via":[223],"UNIT":[224,239,270],"with":[225],"moderate":[226],"effort":[227],"favorable":[229],"performance.":[230],"Given":[231],"instruction":[234],"operation,":[238,253],"automatically":[240],"detects":[241],"applicability":[243],"instruction,":[246],"transforms":[247],"loop":[249,257],"organization":[250],"rewrites":[255],"body":[258],"take":[260],"advantage":[261],"According":[266],"our":[268],"evaluation,":[269],"able":[272],"target":[274],"various":[275],"mainstream":[276],"platforms.":[278],"generated":[280],"end-to-end":[281],"inference":[282],"model":[283],"achieves":[284],"1.3":[285],"x":[286],"speedup":[287,296,306],"over":[288,297,307],"oneDNN":[290],"on":[291,300,316],"an":[292,301,317],"x86":[293],"CPU,":[294],"1.75x":[295],"cuDNN":[299],"GPU,":[303],"1.13x":[305],"carefully":[309],"tuned":[310],"TVM":[311],"solution":[312],"DOT":[315],"CPU.":[319]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
