{"id":"https://openalex.org/W4394998529","doi":"https://doi.org/10.1145/3620665.3640364","title":"Carat: Unlocking Value-Level Parallelism for Multiplier-Free GEMMs","display_name":"Carat: Unlocking Value-Level Parallelism for Multiplier-Free GEMMs","publication_year":2024,"publication_date":"2024-04-22","ids":{"openalex":"https://openalex.org/W4394998529","doi":"https://doi.org/10.1145/3620665.3640364"},"language":"en","primary_location":{"id":"doi:10.1145/3620665.3640364","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640364","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3620665.3640364","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102866081","display_name":"Zhewen Pan","orcid":"https://orcid.org/0009-0009-5707-1137"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhewen Pan","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, WI, United States"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, WI, United States","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034301665","display_name":"Joshua San Miguel","orcid":"https://orcid.org/0000-0002-6886-7183"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joshua San Miguel","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, WI, United States of America"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, WI, United States of America","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037740394","display_name":"Di Wu","orcid":"https://orcid.org/0000-0001-9775-8026"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Di Wu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Central Florida, Orlando, FL, United States of America"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Central Florida, Orlando, FL, United States of America","institution_ids":["https://openalex.org/I106165777"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102866081"],"corresponding_institution_ids":["https://openalex.org/I135310074"],"apc_list":null,"apc_paid":null,"fwci":0.222,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.48072029,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"167","last_page":"184"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8005856275558472},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.650059700012207},{"id":"https://openalex.org/keywords/multiplier","display_name":"Multiplier (economics)","score":0.6037874221801758},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.542077898979187},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.4643670916557312},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.46135565638542175},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.43952956795692444},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4370410740375519},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.42201489210128784},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4192129969596863},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09871765971183777},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08765202760696411}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8005856275558472},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.650059700012207},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.6037874221801758},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.542077898979187},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.4643670916557312},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.46135565638542175},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.43952956795692444},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4370410740375519},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.42201489210128784},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4192129969596863},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09871765971183777},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08765202760696411},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3620665.3640364","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640364","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3620665.3640364","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640364","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7246069058","display_name":null,"funder_award_id":"CNS-2045985","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306555","display_name":"Wisconsin Alumni Research Foundation","ror":"https://ror.org/00hwxbz16"},{"id":"https://openalex.org/F4320310161","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1665214252","https://openalex.org/W1901129140","https://openalex.org/W2128830021","https://openalex.org/W2510046892","https://openalex.org/W2565600385","https://openalex.org/W2605347906","https://openalex.org/W2606722458","https://openalex.org/W2625745261","https://openalex.org/W2802559652","https://openalex.org/W2803739089","https://openalex.org/W2883899390","https://openalex.org/W2884900216","https://openalex.org/W2886848602","https://openalex.org/W2896457183","https://openalex.org/W2906685078","https://openalex.org/W2917767146","https://openalex.org/W2918210848","https://openalex.org/W2933264412","https://openalex.org/W2942720718","https://openalex.org/W2949417445","https://openalex.org/W2963594949","https://openalex.org/W2966824056","https://openalex.org/W2970821029","https://openalex.org/W2998732502","https://openalex.org/W2999905431","https://openalex.org/W3005260158","https://openalex.org/W3006926841","https://openalex.org/W3043216033","https://openalex.org/W3104481311","https://openalex.org/W3135286407","https://openalex.org/W3163725856","https://openalex.org/W3164327364","https://openalex.org/W3191540685","https://openalex.org/W3192336523","https://openalex.org/W3207265322","https://openalex.org/W4200410165","https://openalex.org/W4214674725","https://openalex.org/W4231250608","https://openalex.org/W4236269344","https://openalex.org/W4249299790","https://openalex.org/W4288741487","https://openalex.org/W4293025059","https://openalex.org/W4324297016","https://openalex.org/W4360831962","https://openalex.org/W6676297131","https://openalex.org/W6684859321","https://openalex.org/W6687483927","https://openalex.org/W6728564548","https://openalex.org/W6745718170","https://openalex.org/W6750647073","https://openalex.org/W6756319913","https://openalex.org/W6769062451","https://openalex.org/W6769481992","https://openalex.org/W6785652829","https://openalex.org/W6786271619","https://openalex.org/W6842145542"],"related_works":["https://openalex.org/W3099313426","https://openalex.org/W4287593139","https://openalex.org/W2950520577","https://openalex.org/W1501159154","https://openalex.org/W1554644772","https://openalex.org/W2003935582","https://openalex.org/W2494130044","https://openalex.org/W3170887803","https://openalex.org/W752783541","https://openalex.org/W74409296"],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"hardware":[3],"architectures":[4],"optimized":[5],"for":[6,21,45],"general":[7],"matrix":[8],"multiplication":[9,82],"(GEMM)":[10],"have":[11],"been":[12],"well":[13],"studied":[14],"to":[15,67,118],"deliver":[16],"better":[17],"performance":[18],"and":[19,63,80,100,105,111,113],"efficiency":[20,102],"deep":[22],"neural":[23],"networks.":[24],"With":[25],"trends":[26],"towards":[27],"batched,":[28],"low-precision":[29],"data,":[30],"e.g.,":[31],"FP8":[32],"format":[33],"in":[34],"this":[35],"work,":[36],"we":[37],"observe":[38],"that":[39],"there":[40],"is":[41],"growing":[42],"untapped":[43],"potential":[44],"value":[46],"reuse.":[47],"We":[48],"propose":[49],"a":[50,108],"novel":[51],"computing":[52],"paradigm,":[53],"value-level":[54,78],"parallelism,":[55],"whereby":[56],"unique":[57],"products":[58,70],"are":[59],"computed":[60],"only":[61],"once,":[62],"different":[64],"inputs":[65],"subscribe":[66],"(select)":[68],"their":[69],"via":[71],"temporal":[72],"coding.":[73],"Our":[74],"architecture,":[75],"Carat,":[76],"employs":[77],"parallelism":[79],"transforms":[81],"into":[83],"accumulation,":[84],"performing":[85],"GEMMs":[86],"with":[87],"efficient":[88],"multiplier-free":[89],"hardware.":[90],"Experiments":[91],"show":[92],"that,":[93],"on":[94],"average,":[95],"Carat":[96],"improves":[97],"iso-area":[98],"throughput":[99],"energy":[101],"by":[103],"1.02\u00d7":[104],"1.06\u00d7":[106],"over":[107],"systolic":[109],"array":[110],"3.2\u00d7":[112],"4.3\u00d7":[114],"when":[115],"scaled":[116],"up":[117],"multiple":[119],"nodes.":[120]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
