{"id":"https://openalex.org/W4390337184","doi":"https://doi.org/10.1109/icta60488.2023.10364277","title":"AttenTPU: Tensor Processor for Attention Mechanism with Fine-Grained Padding","display_name":"AttenTPU: Tensor Processor for Attention Mechanism with Fine-Grained Padding","publication_year":2023,"publication_date":"2023-10-27","ids":{"openalex":"https://openalex.org/W4390337184","doi":"https://doi.org/10.1109/icta60488.2023.10364277"},"language":"en","primary_location":{"id":"doi:10.1109/icta60488.2023.10364277","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icta60488.2023.10364277","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Integrated Circuits, Technologies and Applications (ICTA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001133136","display_name":"Zhihao Du","orcid":"https://orcid.org/0000-0003-3509-9322"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhihao Du","raw_affiliation_strings":["Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences,Shenzhen,China","Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences,Shenzhen,China","institution_ids":["https://openalex.org/I4210145761"]},{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045240151","display_name":"Yike Li","orcid":"https://orcid.org/0000-0002-6693-0935"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yike Li","raw_affiliation_strings":["Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences,Shenzhen,China","School of Software Engineering, University of Science and Technology of China","Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences,Shenzhen,China","institution_ids":["https://openalex.org/I4210145761"]},{"raw_affiliation_string":"School of Software Engineering, University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100408301","display_name":"Chao Chen","orcid":"https://orcid.org/0000-0001-6488-224X"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Chen","raw_affiliation_strings":["Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences,Shenzhen,China","Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences,Shenzhen,China","institution_ids":["https://openalex.org/I4210145761"]},{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100401045","display_name":"Zheng Wang","orcid":"https://orcid.org/0000-0001-6157-0662"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Wang","raw_affiliation_strings":["Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences,Shenzhen,China","Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences,Shenzhen,China","institution_ids":["https://openalex.org/I4210145761"]},{"raw_affiliation_string":"Shenzhen Institute of Advanced Technology, ChineseAcademy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5001133136"],"corresponding_institution_ids":["https://openalex.org/I4210145761"],"apc_list":null,"apc_paid":null,"fwci":0.308,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54695352,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"101","last_page":"102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9426000118255615,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7404863834381104},{"id":"https://openalex.org/keywords/padding","display_name":"Padding","score":0.7246962189674377},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5098801851272583},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5015666484832764},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.42387300729751587},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3684734106063843},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.36737489700317383},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2563895583152771},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.1297885775566101}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7404863834381104},{"id":"https://openalex.org/C165435473","wikidata":"https://www.wikidata.org/wiki/Q1509884","display_name":"Padding","level":2,"score":0.7246962189674377},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5098801851272583},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5015666484832764},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.42387300729751587},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3684734106063843},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.36737489700317383},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2563895583152771},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.1297885775566101},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icta60488.2023.10364277","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icta60488.2023.10364277","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Integrated Circuits, Technologies and Applications (ICTA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.5}],"awards":[{"id":"https://openalex.org/G5331173269","display_name":null,"funder_award_id":"2019B010155003","funder_id":"https://openalex.org/F4320336405","funder_display_name":"Special Project for Research and Development in Key areas of Guangdong Province"},{"id":"https://openalex.org/G5344843103","display_name":null,"funder_award_id":"2020B1515120044","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320336405","display_name":"Special Project for Research and Development in Key areas of Guangdong Province","ror":null},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2612690371","https://openalex.org/W2896457183","https://openalex.org/W2923014074","https://openalex.org/W2963122961","https://openalex.org/W4381050415","https://openalex.org/W6739901393","https://openalex.org/W6755207826"],"related_works":["https://openalex.org/W2142641794","https://openalex.org/W4384947563","https://openalex.org/W2946726629","https://openalex.org/W4231428344","https://openalex.org/W4322753435","https://openalex.org/W4243974052","https://openalex.org/W4315882065","https://openalex.org/W4244331477","https://openalex.org/W4236539272","https://openalex.org/W3152699334"],"abstract_inverted_index":{"Transformer-based":[0],"models":[1],"have":[2,39],"achieved":[3],"state-of-the":[4],"art":[5],"performance":[6],"in":[7,45,71,114],"many":[8],"Artificial":[9],"Intelligence":[10],"(AI)":[11],"tasks.":[12],"The":[13,130],"core":[14],"component":[15],"of":[16,50,81,111,146],"the":[17,20,67,72,76,79,88,109,127,137],"transformer":[18],"is":[19,124],"attention":[21,62,73],"mechanism,":[22],"which":[23],"includes":[24],"computation-intensive":[25],"operators-such":[26],"as":[27],"MatMul":[28],"and":[29,37,58,91,139,150],"Linear-raising":[30],"demand":[31],"for":[32],"hardware":[33,42,59,77,120],"support.":[34],"Many":[35],"Institute,":[36],"researchers":[38],"proposed":[40,125],"their":[41],"acceleration":[43],"methods":[44],"recent":[46],"years.":[47],"However,":[48],"most":[49],"these":[51],"works":[52],"mainly":[53],"focus":[54],"on":[55],"algorithm":[56],"optimization":[57],"performance.":[60],"Little":[61],"has":[63],"been":[64],"paid":[65],"to":[66,87,107,136],"variable":[68],"sequence":[69,82,112],"lengths":[70],"mechanism.":[74],"In":[75,95],"design,":[78],"variability":[80,110],"length":[83,113],"brings":[84],"a":[85,101,119],"challenge":[86],"load-store":[89],"unit":[90],"on-chip":[92],"data":[93],"movement.":[94],"this":[96],"work,":[97],"we":[98],"first":[99],"propose":[100],"hardware-friendly":[102],"fine-grained":[103],"padding":[104,128],"method":[105],"aiming":[106],"handle":[108],"scaled":[115],"dot-product":[116],"attention.":[117],"Then,":[118],"architecture":[121],"named":[122],"AttenTPU":[123],"using":[126],"method.":[129],"experimental":[131],"results":[132],"indicate":[133],"that":[134],"compared":[135],"CPU":[138],"GPU":[140],"platform,":[141],"our":[142],"accelerator":[143],"achieves":[144],"speed-ups":[145],"<tex":[147,151],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[148,152],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$3.43\\times$</tex>":[149],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.41\\times$</tex>":[153],",":[154],"respectively.":[155]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
