{"id":"https://openalex.org/W7140330819","doi":"https://doi.org/10.48550/arxiv.2603.23198","title":"Sparser, Faster, Lighter Transformer Language Models","display_name":"Sparser, Faster, Lighter Transformer Language Models","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140330819","doi":"https://doi.org/10.48550/arxiv.2603.23198"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.23198","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23198","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.23198","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130567035","display_name":"Edoardo Cetin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cetin, Edoardo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005345152","display_name":"Stefano Peluchetti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peluchetti, Stefano","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130588540","display_name":"Emilio Castillo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Castillo, Emilio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050692022","display_name":"Akira Naruse","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naruse, Akira","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130574716","display_name":"Mana Murakami","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Murakami, Mana","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122198373","display_name":"Llion Jones","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jones, Llion","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5130567035"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.18240000307559967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.18240000307559967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.1137000024318695,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.08340000361204147,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6495000123977661},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.48840001225471497},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.48579999804496765},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.47540000081062317},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.44510000944137573},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4223000109195709},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.3952000141143799},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.3864000141620636},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.37860000133514404},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.37310001254081726}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.815500020980835},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6495000123977661},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.48840001225471497},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.48579999804496765},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.47540000081062317},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.44510000944137573},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4223000109195709},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.39989998936653137},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.3952000141143799},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.3864000141620636},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.37860000133514404},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.37310001254081726},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.3449999988079071},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.31709998846054077},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.31700000166893005},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.31470000743865967},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.3041999936103821},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.28600001335144043},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.2840000092983246},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.2766000032424927},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.27559998631477356},{"id":"https://openalex.org/C174183944","wikidata":"https://www.wikidata.org/wiki/Q334661","display_name":"MIT License","level":3,"score":0.2676999866962433},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C45340560","wikidata":"https://www.wikidata.org/wiki/Q215382","display_name":"Disjoint sets","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2599000036716461},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.23198","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23198","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.23198","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23198","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.889113187789917}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scaling":[0],"autoregressive":[1],"large":[2],"language":[3],"models":[4],"(LLMs)":[5],"has":[6],"driven":[7],"unprecedented":[8],"progress":[9],"but":[10],"comes":[11],"with":[12,64,103,111,132],"vast":[13],"computational":[14],"costs.":[15],"In":[16],"this":[17],"work,":[18],"we":[19,47,85,114],"tackle":[20],"these":[21,117],"costs":[22],"by":[23],"leveraging":[24],"unstructured":[25],"sparsity":[26,102,118,154],"within":[27],"an":[28,143],"LLM's":[29],"feedforward":[30],"layers,":[31],"the":[32,38,65,161],"components":[33],"accounting":[34],"for":[35,159],"most":[36],"of":[37,57,69,90,165],"model":[39,133],"parameters":[40],"and":[41,54,79,126,140,149,163],"execution":[42,67],"FLOPs.":[43],"To":[44,81],"achieve":[45],"this,":[46],"introduce":[48],"a":[49,55,87,156],"new":[50],"sparse":[51,74],"packing":[52],"format":[53],"set":[56],"CUDA":[58],"kernels":[59,141],"designed":[60],"to":[61,146],"seamlessly":[62],"integrate":[63],"optimized":[66],"pipelines":[68],"modern":[70,166],"GPUs,":[71],"enabling":[72],"efficient":[73],"computation":[75],"during":[76],"LLM":[77,91],"inference":[78],"training.":[80],"substantiate":[82],"our":[83,112],"gains,":[84],"provide":[86],"quantitative":[88],"study":[89],"sparsity,":[92],"demonstrating":[93],"that":[94,116,130],"simple":[95],"L1":[96],"regularization":[97],"can":[98],"induce":[99],"over":[100],"99%":[101],"negligible":[104],"impact":[105],"on":[106],"downstream":[107],"performance.":[108],"When":[109],"paired":[110],"kernels,":[113],"show":[115],"levels":[119],"translate":[120],"into":[121],"substantial":[122],"throughput,":[123],"energy":[124],"efficiency,":[125],"memory":[127],"usage":[128],"benefits":[129],"increase":[131],"scale.":[134],"We":[135],"will":[136],"release":[137],"all":[138],"code":[139],"under":[142],"open-source":[144],"license":[145],"promote":[147],"adoption":[148],"accelerate":[150],"research":[151],"toward":[152],"establishing":[153],"as":[155],"practical":[157],"axis":[158],"improving":[160],"efficiency":[162],"scalability":[164],"foundation":[167],"models.":[168]},"counts_by_year":[],"updated_date":"2026-03-26T06:10:45.909354","created_date":"2026-03-26T00:00:00"}
