{"id":"https://openalex.org/W4398173942","doi":"https://doi.org/10.1109/tc.2024.3404051","title":"ToEx: Accelerating Generation Stage of Transformer-Based Language Models via Token-Adaptive Early Exit","display_name":"ToEx: Accelerating Generation Stage of Transformer-Based Language Models via Token-Adaptive Early Exit","publication_year":2024,"publication_date":"2024-05-21","ids":{"openalex":"https://openalex.org/W4398173942","doi":"https://doi.org/10.1109/tc.2024.3404051"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2024.3404051","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2024.3404051","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047372993","display_name":"Myeonggu Kang","orcid":"https://orcid.org/0000-0003-3557-8526"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Myeonggu Kang","raw_affiliation_strings":["School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"],"raw_orcid":"https://orcid.org/0000-0003-3557-8526","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106404793","display_name":"Junyoung Park","orcid":"https://orcid.org/0009-0005-7722-8702"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Junyoung Park","raw_affiliation_strings":["School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"],"raw_orcid":"https://orcid.org/0009-0005-7722-8702","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073278616","display_name":"Hyein Shin","orcid":"https://orcid.org/0000-0003-0382-4032"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyein Shin","raw_affiliation_strings":["School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"],"raw_orcid":"https://orcid.org/0000-0003-0382-4032","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072316644","display_name":"Jaekang Shin","orcid":"https://orcid.org/0000-0002-5943-1599"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaekang Shin","raw_affiliation_strings":["School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"],"raw_orcid":"https://orcid.org/0000-0002-5943-1599","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052390471","display_name":"Lee\u2010Sup Kim","orcid":"https://orcid.org/0000-0001-9585-4591"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Lee-Sup Kim","raw_affiliation_strings":["School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"],"raw_orcid":"https://orcid.org/0000-0001-9585-4591","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5047372993"],"corresponding_institution_ids":["https://openalex.org/I157485424"],"apc_list":null,"apc_paid":null,"fwci":0.3311,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62326821,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"73","issue":"9","first_page":"2248","last_page":"2261"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8932126760482788},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6647129058837891},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.655771017074585},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5958049297332764},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5256422758102417},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.5204498767852783},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.44669318199157715},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.41649138927459717},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3361465334892273},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.32548850774765015},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.184393048286438},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.15379053354263306},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.11351504921913147}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8932126760482788},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6647129058837891},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.655771017074585},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5958049297332764},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5256422758102417},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.5204498767852783},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.44669318199157715},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.41649138927459717},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3361465334892273},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.32548850774765015},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.184393048286438},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.15379053354263306},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.11351504921913147},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2024.3404051","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2024.3404051","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3807300858","display_name":null,"funder_award_id":"IITP-2020-0-01847","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"}],"funders":[{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W2197084977","https://openalex.org/W2300242332","https://openalex.org/W2512924740","https://openalex.org/W2592790041","https://openalex.org/W2606974598","https://openalex.org/W2725159389","https://openalex.org/W2888482885","https://openalex.org/W2896457183","https://openalex.org/W2906152891","https://openalex.org/W2949615363","https://openalex.org/W2962677625","https://openalex.org/W2979826702","https://openalex.org/W3006586535","https://openalex.org/W3019166713","https://openalex.org/W3034999214","https://openalex.org/W3035038672","https://openalex.org/W3100985894","https://openalex.org/W3101163004","https://openalex.org/W3133652505","https://openalex.org/W3159727696","https://openalex.org/W3207622241","https://openalex.org/W4206634569","https://openalex.org/W4283313765","https://openalex.org/W4285595056","https://openalex.org/W4288089799","https://openalex.org/W4378446728","https://openalex.org/W4379260375","https://openalex.org/W4380714727","https://openalex.org/W4385245566","https://openalex.org/W4385571154","https://openalex.org/W4387321091","https://openalex.org/W6632455782","https://openalex.org/W6682631176","https://openalex.org/W6734465243","https://openalex.org/W6755207826","https://openalex.org/W6766978945","https://openalex.org/W6769627184","https://openalex.org/W6779313456","https://openalex.org/W6794101021","https://openalex.org/W6839092155","https://openalex.org/W6840061620","https://openalex.org/W6852686595","https://openalex.org/W6853048723","https://openalex.org/W6853804809","https://openalex.org/W6855656803","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2366403280","https://openalex.org/W2164382479","https://openalex.org/W1495108544","https://openalex.org/W2091301346","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W3148229873","https://openalex.org/W2013643406"],"abstract_inverted_index":{"Transformer-based":[0],"language":[1,10],"models":[2,24],"have":[3],"recently":[4],"gained":[5],"popularity":[6],"in":[7],"numerous":[8],"natural":[9],"processing":[11],"(NLP)":[12],"applications":[13],"due":[14,45],"to":[15,20,46,61,69,105,165,195],"their":[16],"superior":[17],"performance":[18],"compared":[19,194],"traditional":[21],"algorithms.":[22],"These":[23],"involve":[25],"two":[26,111],"execution":[27,43,197],"stages:":[28],"summarization":[29],"and":[30,53,117],"generation.":[31],"The":[32],"generation":[33,67],"stage":[34,68],"accounts":[35],"for":[36,95,121,140],"a":[37,79,130,154,162],"significant":[38,119],"portion":[39],"of":[40,149,181],"the":[41,66,75,103,147,168,179],"total":[42],"time":[44],"its":[47],"auto-regressive":[48],"property,":[49],"which":[50],"necessitates":[51],"considerable":[52],"repetitive":[54],"off-chip":[55,63,93],"accesses.":[56],"Consequently,":[57],"our":[58,100,175,199],"objective":[59],"is":[60],"minimize":[62,106],"accesses":[64,94],"during":[65],"expedite":[70],"transformer":[71,196],"execution.":[72],"To":[73,124],"achieve":[74],"goal,":[76],"we":[77,128,145],"propose":[78],"token-adaptive":[80],"early":[81],"exit":[82,122,150],"(ToEx)":[83],"that":[84,132,174],"generates":[85],"output":[86,156],"tokens":[87],"using":[88],"fewer":[89],"decoders,":[90],"thereby":[91],"reducing":[92],"loading":[96],"weight":[97],"parameters.":[98],"Although":[99],"approach":[101],"has":[102],"potential":[104],"data":[107],"communication,":[108],"it":[109,188],"brings":[110],"challenges:":[112],"1)":[113],"inaccurate":[114],"self-attention":[115,135],"computation,":[116],"2)":[118],"overhead":[120,148],"decision.":[123],"overcome":[125],"these":[126],"challenges,":[127],"introduce":[129],"methodology":[131],"facilitates":[133],"accurate":[134],"by":[136,152,183],"lazily":[137],"performing":[138],"computations":[139],"previously":[141],"exited":[142],"tokens.":[143],"Moreover,":[144],"mitigate":[146],"decision":[151],"incorporating":[153],"lightweight":[155],"embedding":[157],"layer.":[158],"We":[159],"also":[160],"present":[161],"hardware":[163],"design":[164],"efficiently":[166],"support":[167],"proposed":[169],"work.":[170,200],"Evaluation":[171],"results":[172],"demonstrate":[173],"work":[176],"can":[177],"reduce":[178],"number":[180],"decoders":[182],"2.6\u00d7":[184],"on":[185,192],"average.":[186],"Accordingly,":[187],"achieves":[189],"3.2\u00d7":[190],"speedup":[191],"average":[193],"without":[198]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
