{"id":"https://openalex.org/W4388283449","doi":"https://doi.org/10.1109/tvlsi.2023.3326741","title":"TensorCache: Reconstructing Memory Architecture With SRAM-Based In-Cache Computing for Efficient Tensor Computations in GPGPUs","display_name":"TensorCache: Reconstructing Memory Architecture With SRAM-Based In-Cache Computing for Efficient Tensor Computations in GPGPUs","publication_year":2023,"publication_date":"2023-11-03","ids":{"openalex":"https://openalex.org/W4388283449","doi":"https://doi.org/10.1109/tvlsi.2023.3326741"},"language":"en","primary_location":{"id":"doi:10.1109/tvlsi.2023.3326741","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2023.3326741","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077508723","display_name":"Yicong Zhang","orcid":"https://orcid.org/0009-0000-4579-288X"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yicong Zhang","raw_affiliation_strings":["School of Microelectronics Science and Technology and the Guangdong Provincial Key Laboratory of Optoelectronic Information Processing Chips and Systems, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics Science and Technology and the Guangdong Provincial Key Laboratory of Optoelectronic Information Processing Chips and Systems, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100350118","display_name":"Mingyu Wang","orcid":"https://orcid.org/0000-0003-4006-8870"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyu Wang","raw_affiliation_strings":["School of Microelectronics Science and Technology and the Guangdong Provincial Key Laboratory of Optoelectronic Information Processing Chips and Systems, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics Science and Technology and the Guangdong Provincial Key Laboratory of Optoelectronic Information Processing Chips and Systems, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026837092","display_name":"Yangzhan Mai","orcid":"https://orcid.org/0009-0008-3895-4340"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yangzhan Mai","raw_affiliation_strings":["School of Microelectronics Science and Technology and the Guangdong Provincial Key Laboratory of Optoelectronic Information Processing Chips and Systems, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics Science and Technology and the Guangdong Provincial Key Laboratory of Optoelectronic Information Processing Chips and Systems, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017560036","display_name":"Zhiyi Yu","orcid":"https://orcid.org/0000-0002-8802-0457"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyi Yu","raw_affiliation_strings":["School of Microelectronics Science and Technology and the Guangdong Provincial Key Laboratory of Optoelectronic Information Processing Chips and Systems, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics Science and Technology and the Guangdong Provincial Key Laboratory of Optoelectronic Information Processing Chips and Systems, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5077508723"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":2.42,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.88936627,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"31","issue":"12","first_page":"2030","last_page":"2043"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8390913009643555},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6763926148414612},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5130351781845093},{"id":"https://openalex.org/keywords/in-memory-processing","display_name":"In-Memory Processing","score":0.4823160171508789},{"id":"https://openalex.org/keywords/static-random-access-memory","display_name":"Static random-access memory","score":0.4434267282485962},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.42980796098709106},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.4175899028778076},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.38449227809906006},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.34502145648002625},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.1249469518661499}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8390913009643555},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6763926148414612},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5130351781845093},{"id":"https://openalex.org/C123593499","wikidata":"https://www.wikidata.org/wiki/Q6008583","display_name":"In-Memory Processing","level":5,"score":0.4823160171508789},{"id":"https://openalex.org/C68043766","wikidata":"https://www.wikidata.org/wiki/Q267416","display_name":"Static random-access memory","level":2,"score":0.4434267282485962},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.42980796098709106},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.4175899028778076},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.38449227809906006},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.34502145648002625},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.1249469518661499},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.0},{"id":"https://openalex.org/C194222762","wikidata":"https://www.wikidata.org/wiki/Q114486","display_name":"Query by Example","level":4,"score":0.0},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tvlsi.2023.3326741","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2023.3326741","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.9100000262260437,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G3040571651","display_name":null,"funder_award_id":"62204271","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8357081843","display_name":null,"funder_award_id":"2022A1515011708","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1895577753","https://openalex.org/W1975237352","https://openalex.org/W1979527452","https://openalex.org/W2111366547","https://openalex.org/W2117489143","https://openalex.org/W2119615570","https://openalex.org/W2132292283","https://openalex.org/W2134101883","https://openalex.org/W2142444503","https://openalex.org/W2157331557","https://openalex.org/W2194775991","https://openalex.org/W2613569094","https://openalex.org/W2801000640","https://openalex.org/W2803405094","https://openalex.org/W2897830517","https://openalex.org/W2901073342","https://openalex.org/W2920242713","https://openalex.org/W2963989532","https://openalex.org/W2967324946","https://openalex.org/W2980186997","https://openalex.org/W3005997294","https://openalex.org/W3048581975","https://openalex.org/W3083249310","https://openalex.org/W3091804697","https://openalex.org/W3096221800","https://openalex.org/W3102510044","https://openalex.org/W3133253223","https://openalex.org/W3206857466","https://openalex.org/W4221101426","https://openalex.org/W4233147525","https://openalex.org/W4294371482","https://openalex.org/W4300125772","https://openalex.org/W4308090212","https://openalex.org/W4309672181","https://openalex.org/W4312253643","https://openalex.org/W4313332258","https://openalex.org/W4313839325","https://openalex.org/W4367146866","https://openalex.org/W4376607860","https://openalex.org/W6637373629","https://openalex.org/W6638783484","https://openalex.org/W6638839971","https://openalex.org/W6755919064","https://openalex.org/W6756424931","https://openalex.org/W6811407144"],"related_works":["https://openalex.org/W2186949690","https://openalex.org/W2167303720","https://openalex.org/W2154109900","https://openalex.org/W2497617944","https://openalex.org/W3019064768","https://openalex.org/W1563139915","https://openalex.org/W2109715593","https://openalex.org/W2061075966","https://openalex.org/W3019683061","https://openalex.org/W3147501184"],"abstract_inverted_index":{"General":[0],"purpose":[1],"graphics":[2],"processing":[3],"units":[4,58],"(GPGPUs)":[5],"have":[6],"emerged":[7],"as":[8],"a":[9,45,129,163,256],"convincing":[10],"and":[11,40,56,114,146,180,200,230],"pivotal":[12],"computing":[13],"platform":[14],"for":[15,24,85,127,152],"deep":[16],"learning":[17],"applications.":[18],"However,":[19],"the":[20,33,53,62,102,111,138,160,167,175],"fundamental":[21],"tensor":[22,87,157,250],"computations":[23,88,251],"neural":[25,49],"networks":[26],"on":[27],"GPGPUs":[28,60],"are":[29],"still":[30],"restricted":[31],"by":[32,100,119,252],"von":[34],"Neumann":[35],"bottleneck.":[36],"The":[37],"memory":[38,54,76,81],"bandwidth":[39],"energy":[41,115],"consumption":[42,116,248],"of":[43,48,59,220],"moving":[44],"large":[46],"amount":[47],"network":[50],"data":[51,120],"between":[52,177],"hierarchy":[55],"computational":[57,64],"dominate":[61],"overall":[63],"cost.":[65],"To":[66,122],"address":[67],"these":[68],"challenges,":[69],"this":[70],"article":[71],"proposes":[72],"TensorCache":[73,188],"to":[74,173,191,211,222,241],"reconstruct":[75],"architecture":[77],"with":[78,255],"static":[79],"random-access":[80],"(SRAM)-based":[82],"In-Cache":[83],"Computing":[84],"efficient":[86,124,153],"in":[89,249],"GPGPUs.":[90],"It":[91],"provides":[92],"an":[93,218],"innovative":[94],"digital":[95],"SRAM":[96],"processing-in-memory":[97],"(PIM)":[98],"solution":[99],"transforming":[101],"cache":[103],"array":[104],"into":[105],"large-scale":[106],"PIM":[107],"units,":[108],"effectively":[109],"mitigating":[110],"significant":[112],"performance":[113],"losses":[117],"caused":[118],"movement.":[121],"enable":[123],"hardware-software":[125],"co-design":[126],"TensorCache,":[128],"decoupled":[130],"architecture-based":[131],"SRAM-PIM":[132],"macro":[133],"(SPM)":[134],"is":[135,171],"introduced":[136],"at":[137],"hardware":[139],"level,":[140,162],"supporting":[141],"in-memory":[142],"bit-parallel":[143],"comparison":[144],"(IMBC)":[145],"near-memory":[147],"radix-4":[148],"booth":[149],"encoder":[150],"(NRBE)":[151],"mixed-precision":[154],"floating-point":[155],"(FP)":[156],"computations.":[158],"At":[159],"software":[161],"programming":[164],"model":[165],"leveraging":[166],"GPGPU\u2019s":[168],"flexible":[169],"programmability":[170],"proposed":[172],"bridge":[174],"gap":[176],"application":[178],"demands":[179],"mismatched":[181],"hardware/software":[182],"interfaces.":[183],"Experimental":[184],"evaluations":[185],"demonstrate":[186],"that":[187],"achieves":[189],"up":[190,221],"<inline-formula":[192,201,223,231],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[193,202,224,232],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[194,203,225,233],"<tex-math":[195,204,226,234],"notation=\"LaTeX\">$38.59\\times":[196],"$":[197,206,228,236],"</tex-math></inline-formula>":[198,207,229,237],"speedup":[199],"notation=\"LaTeX\">$16.26\\times":[205],"throughput":[208,238],"enhancement":[209],"compared":[210,240],"GPU":[212,242],"CUDA":[213],"Cores.":[214],"Furthermore,":[215],"it":[216],"attains":[217],"acceleration":[219],"notation=\"LaTeX\">$1.78\\times":[227],"notation=\"LaTeX\">$3.87\\times":[235],"improvement":[239],"Tensor":[243],"Cores,":[244],"while":[245],"saving":[246],"power":[247],"over":[253],"90%":[254],"mere":[257],"21%":[258],"chip":[259],"area":[260],"overhead.":[261]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
