{"id":"https://openalex.org/W4282004837","doi":"https://doi.org/10.1145/3470496.3527411","title":"SIMD <sup>2</sup>","display_name":"SIMD <sup>2</sup>","publication_year":2022,"publication_date":"2022-05-31","ids":{"openalex":"https://openalex.org/W4282004837","doi":"https://doi.org/10.1145/3470496.3527411"},"language":"en","primary_location":{"id":"doi:10.1145/3470496.3527411","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3470496.3527411","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3470496.3527411","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 49th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3470496.3527411","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101827231","display_name":"Yunan Zhang","orcid":"https://orcid.org/0009-0006-4567-2849"},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yunan Zhang","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080155613","display_name":"Po-An Tsai","orcid":"https://orcid.org/0000-0003-4561-6450"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Po-An Tsai","raw_affiliation_strings":["NVIDIA Research"],"affiliations":[{"raw_affiliation_string":"NVIDIA Research","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077363344","display_name":"Hung\u2010Wei Tseng","orcid":"https://orcid.org/0000-0001-8383-5203"},"institutions":[{"id":"https://openalex.org/I2803209242","display_name":"University of California System","ror":"https://ror.org/00pjdza24","country_code":"US","type":"education","lineage":["https://openalex.org/I2803209242"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hung-Wei Tseng","raw_affiliation_strings":["University of California"],"affiliations":[{"raw_affiliation_string":"University of California","institution_ids":["https://openalex.org/I2803209242"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101827231"],"corresponding_institution_ids":["https://openalex.org/I2803209242"],"apc_list":null,"apc_paid":null,"fwci":1.4005,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.79177885,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"552","last_page":"566"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7370226979255676},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7233086824417114},{"id":"https://openalex.org/keywords/semiring","display_name":"Semiring","score":0.6973440647125244},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.6582533717155457},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.6394935846328735},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5532481074333191},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.5446265935897827},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5388414859771729},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.4904901683330536},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4694739580154419},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4000764489173889},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.370262086391449},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.34327229857444763},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2141166627407074},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.19120994210243225},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.17543542385101318},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.12202349305152893},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08986589312553406}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7370226979255676},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7233086824417114},{"id":"https://openalex.org/C21696900","wikidata":"https://www.wikidata.org/wiki/Q1333055","display_name":"Semiring","level":2,"score":0.6973440647125244},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.6582533717155457},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.6394935846328735},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5532481074333191},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.5446265935897827},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5388414859771729},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.4904901683330536},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4694739580154419},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4000764489173889},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.370262086391449},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34327229857444763},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2141166627407074},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.19120994210243225},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.17543542385101318},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.12202349305152893},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08986589312553406},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3470496.3527411","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3470496.3527411","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3470496.3527411","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 49th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3470496.3527411","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3470496.3527411","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3470496.3527411","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 49th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1460629516","display_name":"CSR: Small: IOQL: an I/O Interface for Near-Data Processing","funder_award_id":"1940048","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3268968919","display_name":null,"funder_award_id":"CNS-2007124","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5275565509","display_name":"CNS Core: Small: Re-engineering Applications for Tensor Processing Units","funder_award_id":"2007124","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7933334976","display_name":null,"funder_award_id":"19400","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4282004837.pdf","grobid_xml":"https://content.openalex.org/works/W4282004837.grobid-xml"},"referenced_works_count":60,"referenced_works":["https://openalex.org/W5243928","https://openalex.org/W1440609229","https://openalex.org/W1507039213","https://openalex.org/W1610607916","https://openalex.org/W1965680834","https://openalex.org/W1980153429","https://openalex.org/W1983096721","https://openalex.org/W2070513719","https://openalex.org/W2098297530","https://openalex.org/W2117489143","https://openalex.org/W2160366244","https://openalex.org/W2165829871","https://openalex.org/W2461193710","https://openalex.org/W2477957292","https://openalex.org/W2606722458","https://openalex.org/W2625457103","https://openalex.org/W2795118915","https://openalex.org/W2798724095","https://openalex.org/W2895305554","https://openalex.org/W2901549770","https://openalex.org/W2904192253","https://openalex.org/W2904902077","https://openalex.org/W2955738001","https://openalex.org/W2962903741","https://openalex.org/W2963989532","https://openalex.org/W2979310060","https://openalex.org/W2979439447","https://openalex.org/W2979858238","https://openalex.org/W2980186997","https://openalex.org/W3016107027","https://openalex.org/W3016542674","https://openalex.org/W3016735325","https://openalex.org/W3016769527","https://openalex.org/W3016832937","https://openalex.org/W3036878841","https://openalex.org/W3042416028","https://openalex.org/W3042610241","https://openalex.org/W3046685417","https://openalex.org/W3098650335","https://openalex.org/W3102510044","https://openalex.org/W3103168911","https://openalex.org/W3104528661","https://openalex.org/W3105753905","https://openalex.org/W3115527251","https://openalex.org/W3130106255","https://openalex.org/W3155922894","https://openalex.org/W3157657667","https://openalex.org/W3176818990","https://openalex.org/W3205883294","https://openalex.org/W3206916567","https://openalex.org/W3207730444","https://openalex.org/W3207791991","https://openalex.org/W4213195788","https://openalex.org/W4213383978","https://openalex.org/W4231465166","https://openalex.org/W4241140669","https://openalex.org/W4246219036","https://openalex.org/W4376272906","https://openalex.org/W6811143676","https://openalex.org/W7033251354"],"related_works":["https://openalex.org/W185248416","https://openalex.org/W4312619976","https://openalex.org/W202649520","https://openalex.org/W2381312783","https://openalex.org/W2186303141","https://openalex.org/W2371578151","https://openalex.org/W2183214273","https://openalex.org/W2351097202","https://openalex.org/W2766828645","https://openalex.org/W2047445951"],"abstract_inverted_index":{"Matrix-multiplication":[0],"units":[1],"(MXUs)":[2],"are":[3],"now":[4],"prevalent":[5],"in":[6,52],"every":[7],"computing":[8],"platform.":[9],"The":[10],"key":[11],"attribute":[12],"that":[13,43],"makes":[14],"MXUs":[15],"so":[16],"successful":[17],"is":[18,33],"the":[19,35,47,54],"semiring":[20],"structure,":[21],"which":[22],"allows":[23],"tiling":[24],"for":[25,57],"both":[26],"parallelism":[27],"and":[28,50],"data":[29],"reuse.":[30],"Nonetheless,":[31],"matrix-multiplication":[32],"not":[34],"only":[36,53],"algorithm":[37],"with":[38,65],"such":[39],"attributes.":[40],"We":[41],"find":[42],"many":[44],"algorithms":[45],"share":[46],"same":[48],"structure":[49,68],"differ":[51],"core":[55],"operation;":[56],"example,":[58],"using":[59],"add-minimum":[60],"instead":[61,81],"of":[62,82],"multiply-add.":[63],"Algorithms":[64],"a":[66,76],"semiring-like":[67],"therefore":[69],"have":[70],"potential":[71],"to":[72],"be":[73],"accelerated":[74],"by":[75],"general-purpose":[77],"matrix":[78],"operation":[79],"architecture,":[80],"common":[83],"MXUs.":[84]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
