{"id":"https://openalex.org/W4360831803","doi":"https://doi.org/10.1109/hpca56546.2023.10070997","title":"CTA: Hardware-Software Co-design for Compressed Token Attention Mechanism","display_name":"CTA: Hardware-Software Co-design for Compressed Token Attention Mechanism","publication_year":2023,"publication_date":"2023-02-01","ids":{"openalex":"https://openalex.org/W4360831803","doi":"https://doi.org/10.1109/hpca56546.2023.10070997"},"language":"en","primary_location":{"id":"doi:10.1109/hpca56546.2023.10070997","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10070997","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100324824","display_name":"Haoran Wang","orcid":"https://orcid.org/0000-0002-4622-0119"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoran Wang","raw_affiliation_strings":["Chinese Academy of Sciences,CICS, Institute of Computing Technology","CICS, Institute of Computing Technology, Chinese Academy of Sciences","University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,CICS, Institute of Computing Technology","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"CICS, Institute of Computing Technology, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044810210","display_name":"Haobo Xu","orcid":"https://orcid.org/0000-0002-0243-6516"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haobo Xu","raw_affiliation_strings":["Chinese Academy of Sciences,CICS, Institute of Computing Technology","CICS, Institute of Computing Technology, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,CICS, Institute of Computing Technology","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"CICS, Institute of Computing Technology, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100346965","display_name":"Ying Wang","orcid":"https://orcid.org/0000-0001-5172-4736"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Wang","raw_affiliation_strings":["Chinese Academy of Sciences,CICS, Institute of Computing Technology","University of Chinese Academy of Sciences","Zhejiang Laboratory","CICS, Institute of Computing Technology, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,CICS, Institute of Computing Technology","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Zhejiang Laboratory","institution_ids":["https://openalex.org/I4210123185"]},{"raw_affiliation_string":"CICS, Institute of Computing Technology, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016864694","display_name":"Yinhe Han","orcid":"https://orcid.org/0000-0003-0904-6681"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinhe Han","raw_affiliation_strings":["Chinese Academy of Sciences,CICS, Institute of Computing Technology","Zhejiang Laboratory","CICS, Institute of Computing Technology, Chinese Academy of Sciences","University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,CICS, Institute of Computing Technology","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Zhejiang Laboratory","institution_ids":["https://openalex.org/I4210123185"]},{"raw_affiliation_string":"CICS, Institute of Computing Technology, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100324824"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210090176","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":1.8339,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.87266955,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"429","last_page":"441"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8950936794281006},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8518180251121521},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6737734079360962},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6101066470146179},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6007120609283447},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5915914177894592},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.42516207695007324},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4034254550933838},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3479622006416321},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.12709081172943115},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11951282620429993}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8950936794281006},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8518180251121521},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6737734079360962},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6101066470146179},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6007120609283447},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5915914177894592},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.42516207695007324},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4034254550933838},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3479622006416321},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.12709081172943115},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11951282620429993},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca56546.2023.10070997","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10070997","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.8799999952316284}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":80,"referenced_works":["https://openalex.org/W192724328","https://openalex.org/W288635123","https://openalex.org/W1502916507","https://openalex.org/W1977182282","https://openalex.org/W2048266589","https://openalex.org/W2048779798","https://openalex.org/W2053229256","https://openalex.org/W2109034006","https://openalex.org/W2111006384","https://openalex.org/W2113459411","https://openalex.org/W2117658559","https://openalex.org/W2119144962","https://openalex.org/W2124509324","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2155976638","https://openalex.org/W2157331557","https://openalex.org/W2162006472","https://openalex.org/W2285660444","https://openalex.org/W2442974303","https://openalex.org/W2464177207","https://openalex.org/W2518281301","https://openalex.org/W2541839172","https://openalex.org/W2613989746","https://openalex.org/W2732156181","https://openalex.org/W2804078698","https://openalex.org/W2896457183","https://openalex.org/W2898085636","https://openalex.org/W2945146780","https://openalex.org/W2946417913","https://openalex.org/W2962745591","https://openalex.org/W2963351113","https://openalex.org/W2963367478","https://openalex.org/W2963748441","https://openalex.org/W2963981376","https://openalex.org/W2965373594","https://openalex.org/W2968124245","https://openalex.org/W2979826702","https://openalex.org/W2980200167","https://openalex.org/W2981413347","https://openalex.org/W2998655561","https://openalex.org/W3016542674","https://openalex.org/W3017024317","https://openalex.org/W3034445277","https://openalex.org/W3034655362","https://openalex.org/W3047171714","https://openalex.org/W3096591391","https://openalex.org/W3101704389","https://openalex.org/W3131920484","https://openalex.org/W3170874841","https://openalex.org/W3189877953","https://openalex.org/W4224267386","https://openalex.org/W4240168186","https://openalex.org/W4245199738","https://openalex.org/W4287824654","https://openalex.org/W4292779060","https://openalex.org/W4295312788","https://openalex.org/W4298395628","https://openalex.org/W4385245566","https://openalex.org/W6607776381","https://openalex.org/W6610514318","https://openalex.org/W6629956336","https://openalex.org/W6676984168","https://openalex.org/W6677580257","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6719768283","https://openalex.org/W6729126992","https://openalex.org/W6752378368","https://openalex.org/W6755207826","https://openalex.org/W6763367864","https://openalex.org/W6766673545","https://openalex.org/W6766978945","https://openalex.org/W6767279747","https://openalex.org/W6771917389","https://openalex.org/W6778883912","https://openalex.org/W6779879114","https://openalex.org/W6781275321","https://openalex.org/W6788135285","https://openalex.org/W6843845902"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2027972911","https://openalex.org/W2146343568","https://openalex.org/W2013643406","https://openalex.org/W2891987081","https://openalex.org/W2097707447","https://openalex.org/W1966837078"],"abstract_inverted_index":{"The":[0],"attention":[1,30,60,118,137,155,179,197],"mechanism":[2,31,61,156,198],"is":[3,62,107],"becoming":[4],"an":[5,126],"integral":[6],"part":[7],"of":[8,29,69,100,136,161],"modern":[9],"neural":[10],"networks,":[11],"bringing":[12],"breakthroughs":[13],"to":[14,36,64,67,76,204],"Natural":[15],"Language":[16],"Processing":[17],"(NLP)":[18],"applications":[19],"and":[20,88,112,142,193,228],"even":[21],"Computer":[22],"Vision":[23],"(CV)":[24],"applications.":[25],"Unfortunately,":[26],"the":[27,70,114,147,150,207],"superiority":[28],"comes":[32],"from":[33,97],"its":[34],"ability":[35],"model":[37],"relations":[38,84,101],"between":[39],"any":[40],"two":[41],"positions":[42],"in":[43,117],"long":[44],"sequence,":[45],"which":[46,106],"incurs":[47],"high":[48,195],"inference":[49,71],"overhead.":[50,72],"For":[51],"state-of-the-art":[52],"AI":[53],"workloads":[54],"such":[55],"as":[56],"Bert":[57],"or":[58],"GPT-2,":[59],"reported":[63],"account":[65],"up":[66],"50%":[68],"Previous":[73],"works":[74],"seek":[75],"alleviate":[77],"this":[78,121,190],"performance":[79,196],"bottleneck":[80],"by":[81,146,154],"removing":[82],"useless":[83],"for":[85,103],"each":[86,104],"position":[87],"accelerate":[89],"position-specific":[90],"operations.":[91],"However":[92],"their":[93],"attempts":[94],"require":[95],"selecting":[96],"a":[98,158,167],"sequence":[99,152],"once":[102],"position,":[105],"essentially":[108],"frequent":[109],"on-the-fly":[110],"pruning":[111],"breaks":[113],"inherent":[115],"parallelism":[116],"mechanism.":[119],"In":[120],"paper,":[122],"we":[123,165,200],"propose":[124,166],"CTA,":[125],"algorithm-architecture":[127],"co-designed":[128],"solution":[129],"that":[130,149,171,175],"can":[131,172],"substantially":[132],"reduce":[133],"theoretic":[134],"complexity":[135,186],"mechanism,":[138],"enabling":[139],"significant":[140],"speedup":[141],"energy":[143,222,232,249],"saving.":[144],"Inspired":[145],"fact":[148],"feature":[151,163],"encoded":[153],"contain":[157],"large":[159],"number":[160],"semantic":[162],"repetition,":[164,176],"novel":[168],"approximation":[169,209],"scheme":[170],"efficiently":[173,205],"remove":[174],"only":[177],"calculating":[178],"among":[180],"necessary":[181],"features":[182],"thus":[183],"reducing":[184],"computation":[185],"quadratically.":[187],"To":[188],"utilize":[189],"algorithmic":[191],"bonus":[192],"empower":[194],"inference,":[199],"devise":[201],"specialized":[202],"architecture":[203],"support":[206],"proposed":[208],"scheme.":[210],"Extensive":[211],"experiments":[212],"show":[213],"that,":[214],"on":[215],"average,":[216],"CTA":[217,244],"achieves":[218,245],"27.7\u00d7":[219],"speedup,":[220,230,247],"634.0\u00d7":[221],"savings":[223,233,250],"with":[224,234],"no":[225],"accuracy":[226,237],"loss,":[227],"44.2\u00d7":[229],"950.0\u00d7":[231],"around":[235],"1%":[236],"loss":[238],"over":[239,251],"Nvidia":[240],"V100-SXM2":[241],"GPU.":[242],"Also,":[243],"22.8\u00d7":[246],"479.6\u00d7":[248],"ELSA":[252],"accelerator+GPU":[253],"system.":[254]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
