{"id":"https://openalex.org/W4318256712","doi":"https://doi.org/10.1145/3559009.3569665","title":"Squaring the circle","display_name":"Squaring the circle","publication_year":2022,"publication_date":"2022-10-08","ids":{"openalex":"https://openalex.org/W4318256712","doi":"https://doi.org/10.1145/3559009.3569665"},"language":"en","primary_location":{"id":"doi:10.1145/3559009.3569665","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3559009.3569665","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101635425","display_name":"Xin He","orcid":"https://orcid.org/0000-0001-9280-1208"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xin He","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100714448","display_name":"Kuan-Yu Chen","orcid":"https://orcid.org/0000-0002-4168-6446"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kuan-Yu Chen","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044834152","display_name":"Siying Feng","orcid":"https://orcid.org/0000-0002-2685-4149"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siying Feng","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014196508","display_name":"Hun-Seok Kim","orcid":"https://orcid.org/0000-0002-6658-5502"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hun-Seok Kim","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026311377","display_name":"David Blaauw","orcid":"https://orcid.org/0000-0001-6744-7075"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Blaauw","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014250626","display_name":"Ronald Dreslinski","orcid":null},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ronald Dreslinski","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037541525","display_name":"Trevor Mudge","orcid":"https://orcid.org/0000-0001-7845-2187"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trevor Mudge","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101635425"],"corresponding_institution_ids":["https://openalex.org/I27837315"],"apc_list":null,"apc_paid":null,"fwci":1.8165,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.85036293,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"148","last_page":"159"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7762241363525391},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7350887656211853},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.7246047258377075},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6548546552658081},{"id":"https://openalex.org/keywords/sparse-approximation","display_name":"Sparse approximation","score":0.5588454604148865},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5257300138473511},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.5125644207000732},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.45575517416000366},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.44071725010871887},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38323864340782166},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3217046856880188},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.25877922773361206}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7762241363525391},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7350887656211853},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.7246047258377075},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6548546552658081},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.5588454604148865},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5257300138473511},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.5125644207000732},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.45575517416000366},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.44071725010871887},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38323864340782166},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3217046856880188},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.25877922773361206},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3559009.3569665","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3559009.3569665","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.8999999761581421}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W4424585","https://openalex.org/W1783256592","https://openalex.org/W1974030687","https://openalex.org/W2035080386","https://openalex.org/W2036895660","https://openalex.org/W2051049045","https://openalex.org/W2080048189","https://openalex.org/W2085125624","https://openalex.org/W2096169320","https://openalex.org/W2098335003","https://openalex.org/W2130408605","https://openalex.org/W2172212694","https://openalex.org/W2257437519","https://openalex.org/W2462654719","https://openalex.org/W2537803004","https://openalex.org/W2606722458","https://openalex.org/W2751658790","https://openalex.org/W2781614977","https://openalex.org/W2794952988","https://openalex.org/W2799537975","https://openalex.org/W2808976322","https://openalex.org/W2900228909","https://openalex.org/W2935480346","https://openalex.org/W2945146780","https://openalex.org/W2963076818","https://openalex.org/W2979310060","https://openalex.org/W2979439447","https://openalex.org/W2979747168","https://openalex.org/W2979858238","https://openalex.org/W2980113464","https://openalex.org/W2980270412","https://openalex.org/W3005783121","https://openalex.org/W3016542674","https://openalex.org/W3016735325","https://openalex.org/W3016832937","https://openalex.org/W3016904661","https://openalex.org/W3040024858","https://openalex.org/W3091744339","https://openalex.org/W3102587717","https://openalex.org/W3103168911","https://openalex.org/W3144430200","https://openalex.org/W4240168186","https://openalex.org/W4240365877","https://openalex.org/W4245348408"],"related_works":["https://openalex.org/W2091883426","https://openalex.org/W2174948646","https://openalex.org/W3173235360","https://openalex.org/W2293771254","https://openalex.org/W2024017047","https://openalex.org/W4318256793","https://openalex.org/W4221142455","https://openalex.org/W3121828480","https://openalex.org/W2039875226","https://openalex.org/W2914631005"],"abstract_inverted_index":{"Systolic":[0],"arrays":[1],"have":[2,24],"been":[3,25],"successful":[4],"to":[5,27,58,64,84,131,216],"accelerate":[6],"dense":[7],"linear":[8],"algebra":[9],"for":[10,136],"deep":[11],"neural":[12],"networks":[13],"(DNNs),":[14],"but":[15],"cannot":[16],"handle":[17],"sparse":[18,29,37,66,78,150,161,185,210],"computations":[19],"efficiently.":[20],"Though":[21],"early":[22],"attempts":[23],"made":[26],"perform":[28],"matrix":[30,144,151],"operations":[31,68],"on":[32,177,209,220],"weight-pruned":[33],"DNNs,":[34],"handling":[35],"highly":[36],"matrices":[38,79,211],"with":[39,212],"skewed":[40],"nonzero":[41,99,172],"distribution":[42],"commonly":[43],"seen":[44],"in":[45,129],"real-world":[46,184],"graph":[47],"analytics":[48],"remains":[49],"challenging.":[50],"In":[51],"this":[52],"paper,":[53],"we":[54,71,106],"propose":[55,72],"FlexTPU":[56,176,192,226,244,258],"framework":[57],"repurpose":[59],"tensor":[60],"processing":[61,87],"units":[62],"(TPUs)":[63],"execute":[65],"matrix-vector":[67],"(SpMV).":[69],"First,":[70],"a":[73,119,159,178,201,221,224,249],"lightweight":[74],"Z-shape":[75,140],"mapping":[76,141],"of":[77,88,95,103,115,123,181,232,239],"onto":[80],"the":[81,86,96,104,124,132,139,147],"systolic":[82],"array":[83,114],"eliminate":[85],"zeros":[89],"as":[90,92],"much":[91],"possible,":[93],"regardless":[94],"sparsity":[97],"and":[98,170,183,196,223,234,236,241,265],"distribution.":[100],"On":[101],"top":[102],"mapping,":[105],"devise":[107],"an":[108,113,166,229],"SpMV":[109],"dataflow":[110],"executed":[111],"by":[112,158,261],"PEs,":[116],"which":[117],"are":[118],"slightly":[120],"modified":[121],"version":[122],"conventional":[125],"TPU":[126],"PE.":[127],"Second,":[128],"contrast":[130],"excess":[133],"preprocessing":[134],"mandatory":[135],"prior":[137],"attempts,":[138],"facilitates":[142],"on-the-fly":[143],"condensing":[145],"from":[146],"widely-used":[148],"compressed":[149],"(e.g.":[152],"CSR)":[153],"representation.":[154],"This":[155],"is":[156,245],"accomplished":[157],"proposed":[160],"data":[162],"loader":[163],"that":[164,191],"includes":[165],"on-chip":[167],"row":[168],"decoder":[169],"parallel":[171],"loaders.":[173],"We":[174],"evaluate":[175],"broad":[179],"set":[180],"synthetic":[182],"matrices.":[186],"The":[187],"experimental":[188],"result":[189],"shows":[190],"achieves":[193,228],"3.55\u00d7":[194],"speedup":[195,231,264],"3.27\u00d7":[197],"energy":[198,237,267],"saving":[199,238],"over":[200],"state-of-the-art":[202,217],"design,":[203],"Sparse-TPU.":[204],"It":[205],"performs":[206],"even":[207],"better":[208],"power-law":[213],"distributions.":[214],"Compared":[215],"library":[218],"implementations":[219],"CPU":[222],"GPU,":[225],"also":[227,246],"average":[230],"2.4\u00d7":[233],"4.3\u00d7,":[235],"130.4\u00d7":[240],"495.3\u00d7,":[242],"respectively.":[243],"evaluated":[247],"against":[248],"recent":[250],"re":[251],"configurable":[252],"(chip":[253],"multi-processor)":[254],"CMP":[255],"machine,":[256],"Transmuter.":[257],"outperforms":[259],"Transmuter":[260],"achieving":[262],"5.12\u00d7":[263],"2.65\u00d7":[266],"saving.":[268]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
