{"id":"https://openalex.org/W4403795764","doi":"https://doi.org/10.48550/arxiv.2409.17481","title":"MaskLLM: Learnable Semi-Structured Sparsity for Large Language Models","display_name":"MaskLLM: Learnable Semi-Structured Sparsity for Large Language Models","publication_year":2024,"publication_date":"2024-09-26","ids":{"openalex":"https://openalex.org/W4403795764","doi":"https://doi.org/10.48550/arxiv.2409.17481"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2409.17481","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.17481","pdf_url":"https://arxiv.org/pdf/2409.17481","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2409.17481","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102483331","display_name":"Gongfan Fang","orcid":"https://orcid.org/0009-0009-6935-0432"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Fang, Gongfan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108868459","display_name":"Hongxu Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Hongxu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099241641","display_name":"Saurav Muralidharan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muralidharan, Saurav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030133314","display_name":"Greg Heinrich","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heinrich, Greg","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063722719","display_name":"Jeff Pool","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pool, Jeff","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056503617","display_name":"Jan Kautz","orcid":"https://orcid.org/0000-0002-8830-429X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kautz, Jan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066945976","display_name":"Pavlo Molchanov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Molchanov, Pavlo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5015574447","display_name":"Xinchao Wang","orcid":"https://orcid.org/0000-0003-0057-1404"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xinchao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5102483331"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9038000106811523,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5299333930015564},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4169473350048065},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39002537727355957},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.13786965608596802}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5299333930015564},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4169473350048065},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39002537727355957},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.13786965608596802}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2409.17481","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.17481","pdf_url":"https://arxiv.org/pdf/2409.17481","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2409.17481","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2409.17481","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2409.17481","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.17481","pdf_url":"https://arxiv.org/pdf/2409.17481","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403795764.pdf","grobid_xml":"https://content.openalex.org/works/W4403795764.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4],"distinguished":[5],"by":[6,169],"their":[7],"massive":[8],"parameter":[9],"counts,":[10],"which":[11],"typically":[12],"result":[13],"in":[14,31],"significant":[15],"redundancy.":[16],"This":[17,60],"work":[18],"introduces":[19],"MaskLLM,":[20],"a":[21,43,53,144,163],"learnable":[22,54,178],"pruning":[23],"method":[24,78],"that":[25],"establishes":[26],"Semi-structured":[27],"(or":[28],"``N:M'')":[29],"Sparsity":[30],"LLMs,":[32,115],"aimed":[33],"at":[34,197],"reducing":[35],"computational":[36],"overhead":[37],"during":[38],"inference.":[39],"Instead":[40],"of":[41,94,101,147,186],"developing":[42],"new":[44],"importance":[45],"criterion,":[46],"MaskLLM":[47,109,161],"explicitly":[48],"models":[49],"N:M":[50],"patterns":[51],"as":[52],"distribution":[55,96],"through":[56],"Gumbel":[57],"Softmax":[58],"sampling.":[59],"approach":[61],"facilitates":[62],"end-to-end":[63],"training":[64],"on":[65,113,151],"large-scale":[66],"datasets":[67,83],"and":[68,84,119,129],"offers":[69],"two":[70],"notable":[71],"advantages:":[72],"1)":[73],"High-quality":[74],"Masks":[75],"-":[76,90],"our":[77,130],"effectively":[79],"scales":[80],"to":[81,126,154,189],"large":[82],"learns":[85],"accurate":[86],"masks;":[87],"2)":[88],"Transferability":[89],"the":[91,98,155,171],"probabilistic":[92],"modeling":[93],"mask":[95],"enables":[97],"transfer":[99],"learning":[100,170],"sparsity":[102,112,188],"across":[103],"domains":[104],"or":[105,149,192],"tasks.":[106],"We":[107],"assessed":[108],"using":[110],"2:4":[111,187],"various":[114],"including":[116],"LLaMA-2,":[117],"Nemotron-4,":[118],"GPT-3,":[120],"with":[121,173],"sizes":[122],"ranging":[123],"from":[124],"843M":[125],"15B":[127],"parameters,":[128],"empirical":[131],"results":[132],"show":[133],"substantial":[134],"improvements":[135],"over":[136],"state-of-the-art":[137],"methods.":[138],"For":[139],"instance,":[140],"leading":[141],"approaches":[142],"achieve":[143],"perplexity":[145],"(PPL)":[146],"10":[148],"greater":[150],"Wikitext":[152],"compared":[153],"dense":[156],"model's":[157],"5.12":[158],"PPL,":[159],"but":[160],"achieves":[162],"significantly":[164],"lower":[165],"6.72":[166],"PPL":[167],"solely":[168],"masks":[172,182],"frozen":[174],"weights.":[175],"Furthermore,":[176],"MaskLLM's":[177],"nature":[179],"allows":[180],"customized":[181],"for":[183],"lossless":[184],"application":[185],"downstream":[190],"tasks":[191],"domains.":[193],"Code":[194],"is":[195],"available":[196],"https://github.com/NVlabs/MaskLLM.":[198]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
