{"id":"https://openalex.org/W3145602566","doi":"https://doi.org/10.1145/3586074","title":"A Practical Survey on Faster and Lighter Transformers","display_name":"A Practical Survey on Faster and Lighter Transformers","publication_year":2023,"publication_date":"2023-03-04","ids":{"openalex":"https://openalex.org/W3145602566","doi":"https://doi.org/10.1145/3586074","mag":"3145602566"},"language":"en","primary_location":{"id":"doi:10.1145/3586074","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3586074","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"type":"review","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2103.14636","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065944848","display_name":"Quentin Fournier","orcid":"https://orcid.org/0000-0002-1036-0777"},"institutions":[{"id":"https://openalex.org/I45683168","display_name":"Polytechnique Montr\u00e9al","ror":"https://ror.org/05f8d4e86","country_code":"CA","type":"education","lineage":["https://openalex.org/I45683168"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Quentin Fournier","raw_affiliation_strings":["Polytechnique Montr\u00e9al, Canada"],"raw_orcid":"https://orcid.org/0000-0002-1036-0777","affiliations":[{"raw_affiliation_string":"Polytechnique Montr\u00e9al, Canada","institution_ids":["https://openalex.org/I45683168"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045326673","display_name":"Ga\u00e9tan Marceau Caron","orcid":"https://orcid.org/0009-0004-7590-7421"},"institutions":[{"id":"https://openalex.org/I4210164802","display_name":"Mila - Quebec Artificial Intelligence Institute","ror":"https://ror.org/05c22rx21","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210164802"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ga\u00e9tan Marceau Caron","raw_affiliation_strings":["Mila - Quebec AI Institute, Canada"],"raw_orcid":"https://orcid.org/0009-0004-7590-7421","affiliations":[{"raw_affiliation_string":"Mila - Quebec AI Institute, Canada","institution_ids":["https://openalex.org/I4210164802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038720756","display_name":"Daniel Aloise","orcid":"https://orcid.org/0000-0002-9876-2921"},"institutions":[{"id":"https://openalex.org/I45683168","display_name":"Polytechnique Montr\u00e9al","ror":"https://ror.org/05f8d4e86","country_code":"CA","type":"education","lineage":["https://openalex.org/I45683168"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Daniel Aloise","raw_affiliation_strings":["Polytechnique Montr\u00e9al, Canada"],"raw_orcid":"https://orcid.org/0000-0002-9876-2921","affiliations":[{"raw_affiliation_string":"Polytechnique Montr\u00e9al, Canada","institution_ids":["https://openalex.org/I45683168"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":47.4485,"has_fulltext":false,"cited_by_count":119,"citation_normalized_percentile":{"value":0.9989598,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"55","issue":"14s","first_page":"1","last_page":"40"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8449128866195679},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.756747305393219},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.48885029554367065},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47271010279655457},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4329315721988678},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24785494804382324}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8449128866195679},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.756747305393219},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.48885029554367065},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47271010279655457},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4329315721988678},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24785494804382324},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3586074","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3586074","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2103.14636","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2103.14636","pdf_url":"https://arxiv.org/pdf/2103.14636","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:publications.polymtl.ca:55169","is_oa":false,"landing_page_url":"https://publications.polymtl.ca/55169/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401013","display_name":"PolyPublie (\u00c9cole Polytechnique de Montr\u00e9al)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45683168","host_organization_name":"Polytechnique Montr\u00e9al","host_organization_lineage":["https://openalex.org/I45683168"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article de revue"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2103.14636","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2103.14636","pdf_url":"https://arxiv.org/pdf/2103.14636","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":124,"referenced_works":["https://openalex.org/W1506514354","https://openalex.org/W1583776211","https://openalex.org/W1613249581","https://openalex.org/W1821462560","https://openalex.org/W1836465849","https://openalex.org/W2064675550","https://openalex.org/W2095705004","https://openalex.org/W2114766824","https://openalex.org/W2122948532","https://openalex.org/W2130942839","https://openalex.org/W2134797427","https://openalex.org/W2150884987","https://openalex.org/W2157331557","https://openalex.org/W2194775991","https://openalex.org/W2252136820","https://openalex.org/W2257408573","https://openalex.org/W2267186426","https://openalex.org/W2270070752","https://openalex.org/W2270190199","https://openalex.org/W2325237720","https://openalex.org/W2338908902","https://openalex.org/W2810075754","https://openalex.org/W2885311373","https://openalex.org/W2912521296","https://openalex.org/W2934842096","https://openalex.org/W2940744433","https://openalex.org/W2946567085","https://openalex.org/W2948223045","https://openalex.org/W2951025380","https://openalex.org/W2962746461","https://openalex.org/W2963112338","https://openalex.org/W2963122961","https://openalex.org/W2963341956","https://openalex.org/W2963374479","https://openalex.org/W2963399222","https://openalex.org/W2963403868","https://openalex.org/W2963494889","https://openalex.org/W2963684275","https://openalex.org/W2963748441","https://openalex.org/W2963807318","https://openalex.org/W2963809228","https://openalex.org/W2963813662","https://openalex.org/W2963925437","https://openalex.org/W2963951265","https://openalex.org/W2964081807","https://openalex.org/W2964110616","https://openalex.org/W2964212578","https://openalex.org/W2964308564","https://openalex.org/W2965373594","https://openalex.org/W2965658867","https://openalex.org/W2970557265","https://openalex.org/W2970565456","https://openalex.org/W2970726176","https://openalex.org/W2972818416","https://openalex.org/W2976023236","https://openalex.org/W2978017171","https://openalex.org/W2978670439","https://openalex.org/W2990704537","https://openalex.org/W2991040477","https://openalex.org/W2994673210","https://openalex.org/W2995983533","https://openalex.org/W2996035354","https://openalex.org/W2996428491","https://openalex.org/W2997517014","https://openalex.org/W3005714399","https://openalex.org/W3006683367","https://openalex.org/W3007328579","https://openalex.org/W3015233032","https://openalex.org/W3015468748","https://openalex.org/W3017022649","https://openalex.org/W3021293129","https://openalex.org/W3031696893","https://openalex.org/W3033529678","https://openalex.org/W3034573343","https://openalex.org/W3034609440","https://openalex.org/W3037032032","https://openalex.org/W3037660342","https://openalex.org/W3049039618","https://openalex.org/W3096609285","https://openalex.org/W3098903812","https://openalex.org/W3103754749","https://openalex.org/W3104527631","https://openalex.org/W3104613728","https://openalex.org/W3105966348","https://openalex.org/W3106298483","https://openalex.org/W3108617663","https://openalex.org/W3119786062","https://openalex.org/W3119866685","https://openalex.org/W3121592593","https://openalex.org/W3125056032","https://openalex.org/W3127433878","https://openalex.org/W3127742036","https://openalex.org/W3128976935","https://openalex.org/W3133264589","https://openalex.org/W3136363192","https://openalex.org/W3136541843","https://openalex.org/W3145511196","https://openalex.org/W3157506437","https://openalex.org/W3173365702","https://openalex.org/W3177265267","https://openalex.org/W3205284814","https://openalex.org/W4206706211","https://openalex.org/W4287324206","https://openalex.org/W4287391717","https://openalex.org/W4287667694","https://openalex.org/W4287816361","https://openalex.org/W4287827005","https://openalex.org/W4288335579","https://openalex.org/W4298436404","https://openalex.org/W4323654151","https://openalex.org/W6638523607","https://openalex.org/W6703652217","https://openalex.org/W6752342493","https://openalex.org/W6753278433","https://openalex.org/W6761628794","https://openalex.org/W6764072591","https://openalex.org/W6768501859","https://openalex.org/W6768807518","https://openalex.org/W6774082070","https://openalex.org/W6776048684","https://openalex.org/W6779163297","https://openalex.org/W6783267081","https://openalex.org/W6783522565","https://openalex.org/W6796417832"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4313488044","https://openalex.org/W3209574120","https://openalex.org/W4312192474","https://openalex.org/W4210805261"],"abstract_inverted_index":{"Recurrent":[0],"neural":[1],"networks":[2],"are":[3,11],"effective":[4],"models":[5],"to":[6,13,43,84,106,142,156,160,164,183],"process":[7],"sequences.":[8],"However,":[9,68,140],"they":[10],"unable":[12],"learn":[14],"long-term":[15],"dependencies":[16],"because":[17],"of":[18,48,75,109,146,195],"their":[19],"inherent":[20],"sequential":[21],"nature.":[22],"As":[23],"a":[24,32,76,107,192],"solution,":[25],"Vaswani":[26],"et":[27],"al.":[28],"introduced":[29],"the":[30,37,49,61,73,85,92,102,125,134,143,166,196],"Transformer,":[31],"model":[33],"solely":[34],"based":[35],"on":[36],"attention":[38],"mechanism":[39],"that":[40],"is":[41],"able":[42],"relate":[44],"any":[45],"two":[46],"positions":[47],"input":[50],"sequence,":[51],"hence":[52],"modelling":[53,66],"arbitrary":[54],"long":[55],"dependencies.":[56],"The":[57],"Transformer":[58],"has":[59,96,149],"improved":[60],"state-of-the-art":[62],"across":[63],"numerous":[64],"sequence":[65,86],"tasks.":[67],"its":[69,89],"effectiveness":[70],"comes":[71],"at":[72],"expense":[74],"quadratic":[77],"computational":[78],"and":[79,117,138,154,172,187,189,200],"memory":[80],"complexity":[81],"with":[82],"respect":[83],"length,":[87],"hindering":[88],"adoption.":[90],"Fortunately,":[91],"deep":[93],"learning":[94],"community":[95],"always":[97],"been":[98],"interested":[99],"in":[100,162],"improving":[101],"models\u2019":[103],"efficiency,":[104],"leading":[105],"plethora":[108],"solutions":[110],"such":[111,132],"as":[112,133],"parameter":[113],"sharing,":[114],"pruning,":[115],"mixed-precision,":[116],"knowledge":[118],"distillation.":[119],"Recently,":[120],"researchers":[121,153],"have":[122],"directly":[123],"addressed":[124],"Transformer\u2019s":[126],"limitation":[127],"by":[128,179,190],"designing":[129],"lower-complexity":[130],"alternatives":[131],"Longformer,":[135],"Reformer,":[136],"Linformer,":[137],"Performer.":[139],"due":[141],"wide":[144],"range":[145],"solutions,":[147],"it":[148],"become":[150],"challenging":[151],"for":[152],"practitioners":[155],"determine":[157],"which":[158],"methods":[159],"apply":[161],"practice":[163],"meet":[165],"desired":[167],"tradeoff":[168],"between":[169],"capacity,":[170],"computation,":[171],"memory.":[173],"This":[174],"survey":[175],"addresses":[176],"this":[177],"issue":[178],"investigating":[180],"popular":[181],"approaches":[182],"make":[184],"Transformers":[185],"faster":[186],"lighter":[188],"providing":[191],"comprehensive":[193],"explanation":[194],"methods\u2019":[197],"strengths,":[198],"limitations,":[199],"underlying":[201],"assumptions.":[202]},"counts_by_year":[{"year":2026,"cited_by_count":18},{"year":2025,"cited_by_count":49},{"year":2024,"cited_by_count":34},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":8}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
