{"id":"https://openalex.org/W4285236966","doi":"https://doi.org/10.1109/tcad.2022.3181541","title":"DTATrans: Leveraging Dynamic Token-Based Quantization With Accuracy Compensation Mechanism for Efficient Transformer Architecture","display_name":"DTATrans: Leveraging Dynamic Token-Based Quantization With Accuracy Compensation Mechanism for Efficient Transformer Architecture","publication_year":2022,"publication_date":"2022-06-08","ids":{"openalex":"https://openalex.org/W4285236966","doi":"https://doi.org/10.1109/tcad.2022.3181541"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2022.3181541","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2022.3181541","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022287798","display_name":"Tao Yang","orcid":"https://orcid.org/0000-0001-8588-9483"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tao Yang","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080937344","display_name":"Fei Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Ma","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018106725","display_name":"Xiaoling Li","orcid":"https://orcid.org/0000-0002-1489-117X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoling Li","raw_affiliation_strings":["Inceptio Technology Institute, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Inceptio Technology Institute, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017670541","display_name":"Fangxin Liu","orcid":"https://orcid.org/0000-0002-8769-293X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I4210122302","display_name":"ShangHai JiAi Genetics & IVF Institute","ror":"https://ror.org/02rgbry52","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210122302"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangxin Liu","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","Shanghai Qi Zhi Institute, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Shanghai Qi Zhi Institute, Shanghai, China","institution_ids":["https://openalex.org/I4210122302"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101870121","display_name":"Yilong Zhao","orcid":"https://orcid.org/0000-0002-4888-9027"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yilong Zhao","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036755436","display_name":"Zhezhi He","orcid":"https://orcid.org/0000-0002-6357-236X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhezhi He","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053801300","display_name":"Li Jiang","orcid":"https://orcid.org/0000-0002-7353-8798"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Jiang","raw_affiliation_strings":["School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5022287798"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":2.4452,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.90379468,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"42","issue":"2","first_page":"509","last_page":"520"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.857450544834137},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6879594922065735},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.638163685798645},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.633231520652771},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5983486175537109},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5212069749832153},{"id":"https://openalex.org/keywords/compression-ratio","display_name":"Compression ratio","score":0.44494110345840454},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.4262950122356415},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4162631034851074},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3592502176761627},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3003450632095337},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.2494783103466034}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.857450544834137},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6879594922065735},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.638163685798645},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.633231520652771},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5983486175537109},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5212069749832153},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.44494110345840454},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.4262950122356415},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4162631034851074},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3592502176761627},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3003450632095337},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.2494783103466034},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C511840579","wikidata":"https://www.wikidata.org/wiki/Q12757","display_name":"Internal combustion engine","level":2,"score":0.0},{"id":"https://openalex.org/C171146098","wikidata":"https://www.wikidata.org/wiki/Q124192","display_name":"Automotive engineering","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2022.3181541","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2022.3181541","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G1303213773","display_name":null,"funder_award_id":"2018YFB1403400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G744300498","display_name":null,"funder_award_id":"61834006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322999","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2289252105","https://openalex.org/W2464177207","https://openalex.org/W2604272474","https://openalex.org/W2612690371","https://openalex.org/W2923014074","https://openalex.org/W2949591530","https://openalex.org/W2953212265","https://openalex.org/W2963122961","https://openalex.org/W2963163009","https://openalex.org/W2963748441","https://openalex.org/W2965373594","https://openalex.org/W2998183051","https://openalex.org/W3017024317","https://openalex.org/W3019166713","https://openalex.org/W3035083896","https://openalex.org/W3035332806","https://openalex.org/W3043504674","https://openalex.org/W3098873988","https://openalex.org/W3108833523","https://openalex.org/W3159727696","https://openalex.org/W3177265267","https://openalex.org/W3188427387","https://openalex.org/W3199348509","https://openalex.org/W3213528054","https://openalex.org/W4211049957","https://openalex.org/W4212774754","https://openalex.org/W4226174177","https://openalex.org/W4298422451","https://openalex.org/W6678911119","https://openalex.org/W6703414193","https://openalex.org/W6719768283","https://openalex.org/W6727099177","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6762945437","https://openalex.org/W6766673545","https://openalex.org/W6778883912","https://openalex.org/W6788001715","https://openalex.org/W6797854001","https://openalex.org/W6810763290"],"related_works":["https://openalex.org/W4295943704","https://openalex.org/W3208617247","https://openalex.org/W4380681266","https://openalex.org/W3210256422","https://openalex.org/W3128751578","https://openalex.org/W3192190837","https://openalex.org/W2968430711","https://openalex.org/W4287025778","https://openalex.org/W4386999861","https://openalex.org/W3140491875"],"abstract_inverted_index":{"Models":[0],"based":[1],"on":[2,159,230],"the":[3,55,58,87,95,112,116,139,144,152,160,166,183,186,197,210,240,273],"attention":[4,188,275],"mechanism,":[5],"i.e.,":[6],"transformers,":[7],"have":[8],"shown":[9],"extraordinary":[10],"performance":[11],"in":[12,65,122,138,213,254,266],"natural":[13],"language":[14,232],"processing":[15],"(NLP)":[16],"tasks.":[17,233],"However,":[18],"their":[19],"memory":[20],"footprint,":[21],"inference":[22,32],"latency,":[23],"and":[24,98,149,202,228,258,291],"power":[25],"consumption":[26],"are":[27,136],"still":[28],"prohibitive":[29],"for":[30,111],"efficient":[31],"at":[33,37,281],"edge":[34],"devices,":[35],"even":[36],"data":[38],"centers.":[39],"To":[40],"tackle":[41],"this":[42],"issue,":[43],"we":[44,84,124],"present":[45],"an":[46,204],"algorithm-architecture":[47],"co-design":[48],"named":[49],"DTATrans.":[50],"We":[51,190,218],"find":[52,85],"empirically":[53],"that":[54,86,237],"tolerance":[56],"to":[57,63,73,105,109,165,208],"noise":[59],"varies":[60],"from":[61],"token":[62,64],"attention-based":[66,223],"NLP":[67,224],"models.":[68],"This":[69],"finding":[70],"leads":[71],"us":[72,104],"dynamically":[74,131],"quantize":[75],"different":[76],"tokens":[77,133,169],"with":[78,196,221,272],"mixed":[79],"levels":[80],"of":[81,94,146,185,256,268],"bits.":[82],"Furthermore,":[83],"overstrict":[88],"quantization":[89],"method":[90,108],"causes":[91],"a":[92,107,126],"dilemma":[93],"model":[96,99,113,153],"accuracy":[97,114,154],"compression":[100,117,127],"ratio,":[101],"which":[102],"impels":[103],"explore":[106],"compensate":[110,151],"when":[115],"ratio":[118,145],"is":[119],"high.":[120],"Thus,":[121],"DTATrans,":[123],"design":[125,192],"framework":[128],"that:":[129],"1)":[130],"quantizes":[132],"while":[134],"they":[135],"forwarded":[137],"models;":[140],"2)":[141],"jointly":[142],"determines":[143],"each":[147],"precision;":[148],"3)":[150],"by":[155,171,246],"exploiting":[156],"lightweight":[157],"computing":[158],"0-bit":[161],"tokens.":[162],"Moreover,":[163],"due":[164],"dynamic":[167],"mixed-precision":[168],"caused":[170],"our":[172,193,278],"framework,":[173],"previous":[174,241],"matrix-multiplication":[175],"accelerators":[176],"(e.g.,":[177],"systolic":[178,199],"array)":[179],"cannot":[180],"effectively":[181],"exploit":[182],"benefit":[184],"compressed":[187],"computation.":[189],"thus":[191],"transformer":[194],"accelerator":[195,244,276],"variable-speed":[198],"array":[200],"(VSSA)":[201],"propose":[203],"effective":[205],"optimization":[206],"strategy":[207],"alleviate":[209],"pipeline-stall":[211],"problem":[212],"VSSA":[214],"without":[215],"hardware":[216],"overhead.":[217],"conduct":[219],"experiments":[220],"existing":[222],"models,":[225],"including":[226],"BERT":[227],"GPT-2":[229],"various":[231],"Our":[234],"results":[235],"show":[236],"DTATrans":[238,279],"outperforms":[239],"neural":[242],"network":[243],"Eyeriss":[245],"<inline-formula":[247,259,283,292],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[248,260,284,293],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[249,261,285,294],"<tex-math":[250,262,286,295],"notation=\"LaTeX\">$16.04\\times":[251],"$":[252,264,288,297],"</tex-math></inline-formula>":[253,265,289,298],"terms":[255,267],"speedup":[257,290],"notation=\"LaTeX\">$3.62\\times":[263,287],"energy":[269,299],"saving.":[270],"Compared":[271],"state-of-the-art":[274],"SpAtten,":[277],"achieves":[280],"least":[282],"notation=\"LaTeX\">$4.22\\times":[296],"efficiency":[300],"improvement.":[301]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
