{"id":"https://openalex.org/W4415821063","doi":"https://doi.org/10.1109/tvlsi.2025.3624400","title":"Tensor Manipulation Unit (TMU): Reconfigurable, Near-Memory Tensor Manipulation for High-Throughput AI SoC","display_name":"Tensor Manipulation Unit (TMU): Reconfigurable, Near-Memory Tensor Manipulation for High-Throughput AI SoC","publication_year":2025,"publication_date":"2025-11-03","ids":{"openalex":"https://openalex.org/W4415821063","doi":"https://doi.org/10.1109/tvlsi.2025.3624400"},"language":null,"primary_location":{"id":"doi:10.1109/tvlsi.2025.3624400","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2025.3624400","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120235584","display_name":"Weiyu Zhou","orcid":"https://orcid.org/0009-0002-0035-3589"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":true,"raw_author_name":"Weiyu Zhou","raw_affiliation_strings":["Faculty of Science and Technology, University of Macau, Macau, China"],"affiliations":[{"raw_affiliation_string":"Faculty of Science and Technology, University of Macau, Macau, China","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100401156","display_name":"Zheng Wang","orcid":"https://orcid.org/0000-0003-2855-9570"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Wang","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100408301","display_name":"Chao Chen","orcid":"https://orcid.org/0000-0001-6488-224X"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Chen","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101537777","display_name":"Yike Li","orcid":"https://orcid.org/0009-0006-1140-1194"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yike Li","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081527767","display_name":"Yongkui Yang","orcid":"https://orcid.org/0000-0003-1159-3115"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongkui Yang","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091627905","display_name":"Zhuoyu Wu","orcid":"https://orcid.org/0000-0002-9075-7626"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuoyu Wu","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089860351","display_name":"Anupam Chattopadhyay","orcid":"https://orcid.org/0000-0002-8818-6983"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Anupam Chattopadhyay","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Jurong West, Singapore"],"affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Jurong West, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5120235584"],"corresponding_institution_ids":["https://openalex.org/I204512498"],"apc_list":null,"apc_paid":null,"fwci":3.0714,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93362069,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"34","issue":"2","first_page":"480","last_page":"491"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.3057999908924103,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.3057999908924103,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.2535000145435333,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.040699999779462814,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.6894000172615051},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6111000180244446},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6054999828338623},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6049000024795532},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4361000061035156},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.421099990606308},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.39239999651908875}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7117999792098999},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.6894000172615051},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6111000180244446},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6054999828338623},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6049000024795532},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4562999904155731},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4361000061035156},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.421099990606308},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4000000059604645},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.39239999651908875},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.38679999113082886},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3508000075817108},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.3361000120639801},{"id":"https://openalex.org/C56288433","wikidata":"https://www.wikidata.org/wiki/Q58673","display_name":"Data manipulation language","level":2,"score":0.33550000190734863},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3181000053882599},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3124000132083893},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.30469998717308044},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.27799999713897705},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.27230000495910645},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.25440001487731934},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tvlsi.2025.3624400","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2025.3624400","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1844274058","https://openalex.org/W2098420952","https://openalex.org/W2606722458","https://openalex.org/W2909581446","https://openalex.org/W2963372104","https://openalex.org/W2966582152","https://openalex.org/W2972267209","https://openalex.org/W2979590285","https://openalex.org/W2998487545","https://openalex.org/W3004127905","https://openalex.org/W3018105153","https://openalex.org/W3092745262","https://openalex.org/W3106754126","https://openalex.org/W3157657667","https://openalex.org/W4234059142","https://openalex.org/W4288062521","https://openalex.org/W4360606477","https://openalex.org/W4361020109","https://openalex.org/W4385245566","https://openalex.org/W4388469806","https://openalex.org/W4393407266","https://openalex.org/W4395020696","https://openalex.org/W4398756379","https://openalex.org/W4400230978","https://openalex.org/W4406727864"],"related_works":[],"abstract_inverted_index":{"While":[0],"recent":[1],"advances":[2],"in":[3,58,185,192],"AI":[4,97],"SoC":[5],"design":[6],"have":[7],"focused":[8],"heavily":[9],"on":[10,22],"accelerating":[11],"tensor":[12,19,83,205],"computation,":[13],"the":[14,37,89,114,140,175,178,190,197,200],"equally":[15],"critical":[16],"task":[17],"of":[18,79,199],"manipulation":[20],"(TM)\u2014centered":[21],"high-volume":[23],"data":[24],"movement":[25],"with":[26,174],"minimal":[27],"computation\u2014remains":[28],"underexplored.":[29],"This":[30],"work":[31],"addresses":[32],"that":[33,139],"gap":[34],"by":[35],"introducing":[36],"TM":[38],"unit":[39],"(TMU):":[40],"a":[41,59,63,68,76,92,95,182],"reconfigurable,":[42],"near-memory":[43],"hardware":[44],"block":[45],"designed":[46],"to":[47,106,145],"execute":[48],"data-movement-intensive":[49],"(DMI)":[50],"operators":[51],"efficiently.":[52],"The":[53,85,110],"TMU":[54,90,141,201],"manipulates":[55],"long":[56],"datastreams":[57],"memory-to-memory":[60],"fashion":[61],"using":[62],"RISC-inspired":[64],"execution":[65],"model":[66],"and":[67,81,103,153,167,196],"unified":[69],"addressing":[70],"abstraction,":[71],"enabling":[72],"support":[73],"for":[74],"both":[75],"wide":[77],"range":[78],"coarse-":[80],"fine-grained":[82],"transformations.":[84],"proposed":[86],"architecture":[87,202],"integrates":[88],"alongside":[91],"TPU":[93],"within":[94],"high-throughput":[96],"system-on-chip":[98],"(SoC),":[99],"leveraging":[100],"double":[101],"buffering":[102],"output":[104],"forwarding":[105],"improve":[107],"pipeline":[108],"utilization.":[109],"TMU,":[111],"synthesized":[112],"under":[113],"SMIC":[115],"40-nm":[116],"standard":[117],"cell":[118],"library,":[119],"occupies":[120],"only":[121],"<inline-formula":[122,146,154],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[123,147,155],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[124,148,156],"<tex-math":[125,149,157],"notation=\"LaTeX\">$0.019~\\mathrm":[126],"{\\text":[127],"{mm}^{2}}$</tex-math>":[128],"</inline-formula>":[129,152,160],"while":[130],"supporting":[131],"over":[132,164],"10":[133],"representative":[134],"DMI":[135],"operators.":[136,206],"Benchmarking":[137],"shows":[138],"alone":[142],"achieves":[143,181],"up":[144],"notation=\"LaTeX\">$82.42\\times":[150],"$</tex-math>":[151,159],"notation=\"LaTeX\">$11.06\\times":[158],"operator-level":[161],"latency":[162,195],"reduction":[163,184],"ARM":[165],"A72":[166],"NVIDIA":[168],"Jetson":[169],"TX2,":[170],"respectively.":[171],"When":[172],"integrated":[173],"in-house":[176],"TPU,":[177],"complete":[179],"system":[180],"22.89%":[183],"end-to-end":[186],"inference":[187,194],"latency,":[188],"demonstrating":[189],"effectiveness":[191],"reducing":[193],"scalability":[198],"across":[203],"diverse":[204]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-11-03T00:00:00"}
