{"id":"https://openalex.org/W3180037928","doi":"https://doi.org/10.1145/3534678.3539260","title":"Learned Token Pruning for Transformers","display_name":"Learned Token Pruning for Transformers","publication_year":2022,"publication_date":"2022-08-12","ids":{"openalex":"https://openalex.org/W3180037928","doi":"https://doi.org/10.1145/3534678.3539260","mag":"3180037928"},"language":"en","primary_location":{"id":"doi:10.1145/3534678.3539260","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3534678.3539260","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3534678.3539260","source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3534678.3539260","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002255210","display_name":"Sehoon Kim","orcid":"https://orcid.org/0000-0002-9339-5480"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sehoon Kim","raw_affiliation_strings":["University of California, Berkeley, Berkeley, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100784815","display_name":"Sheng Shen","orcid":"https://orcid.org/0000-0002-8773-6365"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sheng Shen","raw_affiliation_strings":["University of California, Berkeley, Berkeley, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065376296","display_name":"David Thorsley","orcid":"https://orcid.org/0000-0001-8625-5095"},"institutions":[{"id":"https://openalex.org/I4210101778","display_name":"Samsung (United States)","ror":"https://ror.org/01bfbvm65","country_code":"US","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Thorsley","raw_affiliation_strings":["Samsung Semiconductor, Inc., San Jose, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Samsung Semiconductor, Inc., San Jose, CA, USA","institution_ids":["https://openalex.org/I4210101778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103894843","display_name":"Amir Gholami","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amir Gholami","raw_affiliation_strings":["University of California, Berkeley, Berkeley, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066299337","display_name":"Woosuk Kwon","orcid":"https://orcid.org/0009-0008-8870-4892"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Woosuk Kwon","raw_affiliation_strings":["University of California, Berkeley, Berkeley, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012298536","display_name":"Joseph Hassoun","orcid":null},"institutions":[{"id":"https://openalex.org/I4210101778","display_name":"Samsung (United States)","ror":"https://ror.org/01bfbvm65","country_code":"US","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joseph Hassoun","raw_affiliation_strings":["Samsung Semiconductor, Inc., Berkeley, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Samsung Semiconductor, Inc., Berkeley, CA, USA","institution_ids":["https://openalex.org/I4210101778"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047285420","display_name":"Kurt Keutzer","orcid":"https://orcid.org/0000-0003-3868-8501"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kurt Keutzer","raw_affiliation_strings":["University of California, Berkeley, Berkeley, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5002255210"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":18.5697,"has_fulltext":true,"cited_by_count":93,"citation_normalized_percentile":{"value":0.99799197,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"784","last_page":"794"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7838432788848877},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7771205306053162},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.5668375492095947},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5226352214813232},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4299900531768799},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4172274172306061},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.41084861755371094},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35800230503082275},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.13603147864341736},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.08521613478660583},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07920053601264954}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7838432788848877},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7771205306053162},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.5668375492095947},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5226352214813232},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4299900531768799},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4172274172306061},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.41084861755371094},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35800230503082275},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.13603147864341736},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.08521613478660583},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07920053601264954},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3534678.3539260","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3534678.3539260","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3534678.3539260","source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3534678.3539260","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3534678.3539260","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3534678.3539260","source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.8799999952316284,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3180037928.pdf","grobid_xml":"https://content.openalex.org/works/W3180037928.grobid-xml"},"referenced_works_count":66,"referenced_works":["https://openalex.org/W131533222","https://openalex.org/W1516184288","https://openalex.org/W1821462560","https://openalex.org/W2064675550","https://openalex.org/W2130158090","https://openalex.org/W2242818861","https://openalex.org/W2251939518","https://openalex.org/W2427527485","https://openalex.org/W2607892599","https://openalex.org/W2618305643","https://openalex.org/W2754526845","https://openalex.org/W2767785892","https://openalex.org/W2805003733","https://openalex.org/W2894740066","https://openalex.org/W2915106038","https://openalex.org/W2916954108","https://openalex.org/W2924902521","https://openalex.org/W2940744433","https://openalex.org/W2945767825","https://openalex.org/W2947946877","https://openalex.org/W2948130861","https://openalex.org/W2949941638","https://openalex.org/W2963145730","https://openalex.org/W2963310665","https://openalex.org/W2963341956","https://openalex.org/W2963382930","https://openalex.org/W2963403868","https://openalex.org/W2964266063","https://openalex.org/W2965373594","https://openalex.org/W2969515962","https://openalex.org/W2975059944","https://openalex.org/W2975381464","https://openalex.org/W2978017171","https://openalex.org/W2979314664","https://openalex.org/W2998183051","https://openalex.org/W3000514857","https://openalex.org/W3005692288","https://openalex.org/W3015233032","https://openalex.org/W3015298864","https://openalex.org/W3017022649","https://openalex.org/W3020268419","https://openalex.org/W3022969335","https://openalex.org/W3024171804","https://openalex.org/W3033529678","https://openalex.org/W3034573343","https://openalex.org/W3034609440","https://openalex.org/W3034742519","https://openalex.org/W3036463250","https://openalex.org/W3045733172","https://openalex.org/W3098576111","https://openalex.org/W3100985894","https://openalex.org/W3102129360","https://openalex.org/W3102403234","https://openalex.org/W3104033643","https://openalex.org/W3104216863","https://openalex.org/W3104263050","https://openalex.org/W3105966348","https://openalex.org/W3111747337","https://openalex.org/W3114304470","https://openalex.org/W3125953778","https://openalex.org/W3131922516","https://openalex.org/W3137147200","https://openalex.org/W3171750540","https://openalex.org/W3174657338","https://openalex.org/W3174708387","https://openalex.org/W3196318247"],"related_works":["https://openalex.org/W4382323155","https://openalex.org/W4315697128","https://openalex.org/W3205506801","https://openalex.org/W2971502891","https://openalex.org/W3183570023","https://openalex.org/W4287067436","https://openalex.org/W4388335561","https://openalex.org/W4389115113","https://openalex.org/W3100944160","https://openalex.org/W3016124757"],"abstract_inverted_index":{"Efficient":[0],"deployment":[1],"of":[2,83,110,139],"transformer":[3,54],"models":[4],"in":[5,158,187,196],"practice":[6],"is":[7,70,179],"challenging":[8],"due":[9],"to":[10,87,131,146,160,185],"their":[11],"inference":[12],"cost":[13],"including":[14],"memory":[15],"footprint,":[16],"latency,":[17],"and":[18,95,114,117,162,170,198],"power":[19],"consumption,":[20],"which":[21,43,156],"scales":[22],"quadratically":[23],"with":[24,61,135,150],"input":[25,50,93,188],"sequence":[26,51,86,189],"length.":[27],"To":[28],"address":[29],"this,":[30],"we":[31,175],"present":[32],"a":[33,66],"novel":[34],"token":[35,103,126],"reduction":[36,149],"method":[37,79,121],"dubbed":[38],"Learned":[39],"Token":[40],"Pruning":[41],"(LTP)":[42],"adaptively":[44,89],"removes":[45],"unimportant":[46],"tokens":[47,60],"as":[48,101],"an":[49,62],"passes":[52],"through":[53],"layers.":[55],"In":[56,141],"particular,":[57,142],"LTP":[58,111,143,178],"prunes":[59],"attention":[63],"score":[64],"below":[65],"threshold,":[67],"whose":[68],"value":[69],"learned":[71],"for":[72],"each":[73],"layer":[74],"during":[75],"training.":[76],"Our":[77,191],"threshold-based":[78],"allows":[80],"the":[81,84,92,108,123,136],"length":[82],"pruned":[85],"vary":[88],"based":[90],"on":[91,112,166],"sequence,":[94],"avoids":[96],"algorithmically":[97],"expensive":[98],"operations":[99],"such":[100],"top-k":[102],"selection.":[104],"We":[105],"extensively":[106],"test":[107],"performance":[109],"GLUE":[113],"SQuAD":[115],"tasks":[116],"show":[118],"that":[119,177],"our":[120],"outperforms":[122],"prior":[124,183],"state-of-the-art":[125],"pruning":[127],"methods":[128,184],"by":[129],"up":[130,145,159],"\u223d2.5%":[132],"higher":[133],"accuracy":[134,154],"same":[137],"amount":[138],"FLOPs.":[140],"achieves":[144],"2.1\u00d7":[147],"FLOPs":[148],"less":[151],"than":[152,182],"1%":[153],"drop,":[155],"results":[157],"1.9\u00d7":[161],"2.0\u00d7":[163],"throughput":[164],"improvement":[165],"Intel":[167],"Haswell":[168],"CPUs":[169],"NVIDIA":[171],"V100":[172],"GPUs.":[173],"Furthermore,":[174],"demonstrate":[176],"more":[180],"robust":[181],"variations":[186],"lengths.":[190],"code":[192],"has":[193],"been":[194],"developed":[195],"PyTorch":[197],"open-sourced":[199]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":39},{"year":2024,"cited_by_count":36},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
