{"id":"https://openalex.org/W4411505085","doi":"https://doi.org/10.1145/3745028","title":"TAFP-ViT: A Transformer Accelerator via QKV Computational Fusion and Adaptive Pruning for Vision Transformer","display_name":"TAFP-ViT: A Transformer Accelerator via QKV Computational Fusion and Adaptive Pruning for Vision Transformer","publication_year":2025,"publication_date":"2025-06-21","ids":{"openalex":"https://openalex.org/W4411505085","doi":"https://doi.org/10.1145/3745028"},"language":"en","primary_location":{"id":"doi:10.1145/3745028","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3745028","pdf_url":null,"source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5117396952","display_name":"Liang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liang Xu","raw_affiliation_strings":["Sun Yat-Sen University"],"affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039520060","display_name":"HongRui Song","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongrui Song","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015624275","display_name":"Tian Lan","orcid":"https://orcid.org/0000-0003-1321-334X"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lan Tian","raw_affiliation_strings":["Shandong University","Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Shandong University","institution_ids":["https://openalex.org/I154099455"]},{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100696999","display_name":"Zhongfeng Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongfeng Wang","raw_affiliation_strings":["Sun Yat-Sen University","Shandong University"],"affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University","institution_ids":[]},{"raw_affiliation_string":"Shandong University","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100785858","display_name":"Meiqi Wang","orcid":"https://orcid.org/0000-0001-9553-3640"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meiqi Wang","raw_affiliation_strings":["School of Integrated Circuits, Sun Yat-Sen University","Sun Yat-Sen University"],"affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Sun Yat-Sen University","institution_ids":[]},{"raw_affiliation_string":"Sun Yat-Sen University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5117396952"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12369924,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"24","issue":"5","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.758538007736206},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5896720290184021},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5244929194450378},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4775708317756653},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.4626738727092743},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4517962634563446},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3514236807823181},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.33831214904785156},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.33808034658432007},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2279251515865326}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.758538007736206},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5896720290184021},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5244929194450378},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4775708317756653},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.4626738727092743},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4517962634563446},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3514236807823181},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.33831214904785156},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.33808034658432007},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2279251515865326},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3745028","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3745028","pdf_url":null,"source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2025754354","https://openalex.org/W2047674856","https://openalex.org/W2322411027","https://openalex.org/W2602218415","https://openalex.org/W2970106668","https://openalex.org/W2972045924","https://openalex.org/W3131922516","https://openalex.org/W3138516171","https://openalex.org/W3151130473","https://openalex.org/W3159727696","https://openalex.org/W3162542754","https://openalex.org/W3189877953","https://openalex.org/W3206453033","https://openalex.org/W4214686755","https://openalex.org/W4224272693","https://openalex.org/W4293025165","https://openalex.org/W4302198818","https://openalex.org/W4308479898","https://openalex.org/W4312849330","https://openalex.org/W4321637298","https://openalex.org/W4322760417","https://openalex.org/W4360994128","https://openalex.org/W4379116138","https://openalex.org/W4386076285","https://openalex.org/W4390874124","https://openalex.org/W4391382585","https://openalex.org/W4393973515","https://openalex.org/W4396628237"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W4382618745","https://openalex.org/W1973775000","https://openalex.org/W2748922771","https://openalex.org/W1925544630","https://openalex.org/W2004686618"],"abstract_inverted_index":{"The":[0,116,157,175],"remarkable":[1,223],"progress":[2],"of":[3,19,225,246,256],"Vision":[4,58],"Transformer":[5],"(ViT)":[6],"models":[7,34],"has":[8,159],"significantly":[9],"advanced":[10],"performance":[11],"in":[12,21],"computer":[13],"vision":[14],"tasks.":[15],"However,":[16],"the":[17,28,61,121,138,152,207,219],"deployment":[18],"ViTs":[20],"resource-constrained":[22],"environments":[23],"remains":[24],"a":[25,36,52,66,143,199,244],"challenge,":[26,48],"as":[27],"attention":[29],"computation":[30,94,155,189],"mechanisms":[31],"within":[32],"these":[33],"form":[35],"significant":[37],"bottleneck,":[38],"requiring":[39],"substantial":[40],"memory":[41,92],"and":[42,77,83,93,107,124,129,172,177,186,212,228,236,251],"computational":[43,193],"resources.":[44],"To":[45],"address":[46],"this":[47],"we":[49],"introduce":[50],"TAFP-ViT,":[51],"tailored":[53],"hardware-software":[54],"co-design":[55],"framework":[56],"for":[57,87],"Transformers.":[59],"On":[60,137],"software":[62,118],"level,":[63,140],"TAFP-ViT":[64,97,141,181,196,221,242],"leverages":[65],"learnable":[67],"compressor":[68],"to":[69,103,112,148,151,182,202,232,248],"perform":[70],"multi-head":[71],"shared":[72],"compression":[73],"on":[74,206],"feature":[75],"maps,":[76],"fuses":[78],"decompression":[79],"reconstruction,":[80],"QKV":[81,84,153],"generation":[82],"processing":[85],"together":[86],"calculation,":[88],"thereby":[89],"greatly":[90,191],"reducing":[91],"requirements.":[95],"Furthermore,":[96],"combines":[98],"dynamic":[99,204],"inter-layer":[100],"token":[101],"pruning":[102,111,205],"eliminate":[104],"unimportant":[105],"tokens":[106],"hardware-friendly":[108],"intra-block":[109],"row":[110],"diminish":[113],"redundant":[114],"computations.":[115],"proposed":[117,220],"design":[119],"converts":[120],"calculations":[122],"before":[123],"after":[125],"SoftMax":[126],"into":[127],"dense":[128,171],"sparse":[130,173],"triple":[131],"matrix":[132,168],"multiplication":[133,169],"(TMM)":[134],"forms":[135],"respectively.":[136,240],"hardware":[139],"proposes":[142],"configurable":[144],"systolic":[145],"array":[146],"(SA)":[147],"efficiently":[149],"adapt":[150],"fusion":[154],"pattern.":[156],"SA":[158],"flexible":[160,178],"PE":[161],"units":[162],"that":[163,218],"can":[164],"effectively":[165],"support":[166,203],"general":[167],"(GEMM),":[170],"TMM.":[174],"TMM":[176],"dataflows":[179],"allow":[180],"avoid":[183],"handling":[184],"transpositions":[185],"storing":[187],"intermediate":[188],"results,":[190],"enhancing":[192],"efficiency.":[194],"Besides,":[195],"innovatively":[197],"designs":[198],"Top-k":[200],"engine":[201],"fly":[208],"with":[209],"high":[210],"throughput":[211,245],"low":[213],"resource":[214],"consumption.":[215],"Experiments":[216],"show":[217],"achieves":[222],"speedups":[224],"123.91\u00d7,":[226],"29.5\u00d7,":[227],"3.01\u223c":[229],"20.65\u00d7":[230],"compared":[231],"conventional":[233],"CPUs,":[234],"GPUs,":[235],"previous":[237],"state-of-the-art":[238],"works,":[239],"Additionally,":[241],"reaches":[243],"up":[247],"731.5":[249],"GOP/s":[250],"an":[252],"impressive":[253],"energy":[254],"efficiency":[255],"77.9":[257],"GOPS/W.":[258]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
