{"id":"https://openalex.org/W4415907976","doi":"https://doi.org/10.48550/arxiv.2504.11409","title":"Minitron-SSM: Efficient Hybrid Language Model Compression through Group-Aware SSM Pruning","display_name":"Minitron-SSM: Efficient Hybrid Language Model Compression through Group-Aware SSM Pruning","publication_year":2025,"publication_date":"2025-04-15","ids":{"openalex":"https://openalex.org/W4415907976","doi":"https://doi.org/10.48550/arxiv.2504.11409"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2504.11409","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.11409","pdf_url":"https://arxiv.org/pdf/2504.11409","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2504.11409","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089955412","display_name":"Ali Taghibakhshi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taghibakhshi, Ali","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084832562","display_name":"Sharath Turuvekere Sreenivas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sreenivas, Sharath Turuvekere","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099241641","display_name":"Saurav Muralidharan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muralidharan, Saurav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107404727","display_name":"Marcin Chochowski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chochowski, Marcin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037977602","display_name":"Yashaswi Karnati","orcid":"https://orcid.org/0000-0002-2512-1250"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karnati, Yashaswi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107957234","display_name":"Raviraj Joshi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joshi, Raviraj","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063510148","display_name":"Ameya Sunil Mahabaleshwarkar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahabaleshwarkar, Ameya Sunil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065367280","display_name":"Zijia Chen","orcid":"https://orcid.org/0009-0009-6303-5095"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zijia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114761515","display_name":"Yoshi Suhara","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Suhara, Yoshi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005526391","display_name":"Oluwatobi Olabiyi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olabiyi, Oluwatobi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119979756","display_name":"Daniel Korzekwa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Korzekwa, Daniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031170568","display_name":"Mostofa Patwary","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patwary, Mostofa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072436307","display_name":"Mohammad Shoeybi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shoeybi, Mohammad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056503617","display_name":"Jan Kautz","orcid":"https://orcid.org/0000-0002-8830-429X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kautz, Jan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066242985","display_name":"Bryan Catanzaro","orcid":"https://orcid.org/0000-0003-0034-7728"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Catanzaro, Bryan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001012548","display_name":"Ashwath Aithal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aithal, Ashwath","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011019569","display_name":"Nima Tajbakhsh","orcid":"https://orcid.org/0000-0001-8614-4811"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tajbakhsh, Nima","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5066945976","display_name":"Pavlo Molchanov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Molchanov, Pavlo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2214999943971634,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2214999943971634,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.19760000705718994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.125900000333786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.7929999828338623},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.661899983882904},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.6187999844551086},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.45969998836517334},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.43709999322891235},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4228000044822693},{"id":"https://openalex.org/keywords/hybrid-system","display_name":"Hybrid system","score":0.3912000060081482},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.376800000667572}],"concepts":[{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.7929999828338623},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7293999791145325},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.661899983882904},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.6187999844551086},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46059998869895935},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.45969998836517334},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.43709999322891235},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4361000061035156},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4228000044822693},{"id":"https://openalex.org/C50897621","wikidata":"https://www.wikidata.org/wiki/Q2665508","display_name":"Hybrid system","level":2,"score":0.3912000060081482},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.376800000667572},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.34779998660087585},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33570000529289246},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.30799999833106995},{"id":"https://openalex.org/C2778671685","wikidata":"https://www.wikidata.org/wiki/Q219239","display_name":"Recipe","level":2,"score":0.30169999599456787},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.27230000495910645},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.26420000195503235},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.257099986076355}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2504.11409","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.11409","pdf_url":"https://arxiv.org/pdf/2504.11409","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"doi:10.48550/arxiv.2504.11409","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2504.11409","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2504.11409","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.11409","pdf_url":"https://arxiv.org/pdf/2504.11409","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415907976.pdf","grobid_xml":"https://content.openalex.org/works/W4415907976.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Hybrid":[0,50,121],"LLM":[1],"architectures":[2],"that":[3,21,59],"combine":[4],"Attention":[5],"and":[6,14,24,67,85,100],"State":[7],"Space":[8],"Models":[9],"(SSMs)":[10],"achieve":[11,82],"state-of-the-art":[12],"accuracy":[13,84,139],"runtime":[15],"performance.":[16],"Recent":[17],"work":[18],"has":[19],"demonstrated":[20],"applying":[22],"compression":[23,93],"distillation":[25],"to":[26,81,89,109,124,129],"Attention-only":[27],"models":[28,33,142],"yields":[29],"smaller,":[30],"more":[31],"accurate":[32],"at":[34],"a":[35,54],"fraction":[36],"of":[37,48,64,77,140],"the":[38,46,61,75,110,118,138,150],"training":[39,132],"cost.":[40],"In":[41],"this":[42,114],"work,":[43],"we":[44,73,116],"explore":[45],"effectiveness":[47],"compressing":[49],"architectures.":[51],"We":[52],"introduce":[53],"novel":[55],"group-aware":[56],"pruning":[57,80],"strategy":[58],"preserves":[60],"structural":[62],"integrity":[63],"SSM":[65,79],"blocks":[66],"their":[68],"sequence":[69],"modeling":[70],"capabilities.":[71],"Furthermore,":[72],"demonstrate":[74],"necessity":[76],"such":[78],"improved":[83],"inference":[86],"speed":[87],"compared":[88],"traditional":[90],"approaches.":[91],"Our":[92],"recipe":[94],"combines":[95],"SSM,":[96],"FFN,":[97],"embedding":[98],"dimension,":[99],"layer":[101],"pruning,":[102],"followed":[103],"by":[104],"knowledge":[105],"distillation-based":[106],"retraining,":[107],"similar":[108],"MINITRON":[111],"technique.":[112],"Using":[113],"approach,":[115],"compress":[117],"Nemotron-H":[119],"8B":[120],"model":[122,136],"down":[123],"4B":[125],"parameters":[126],"with":[127],"up":[128],"40x":[130],"fewer":[131],"tokens.":[133],"The":[134],"resulting":[135],"surpasses":[137],"similarly-sized":[141],"while":[143],"achieving":[144],"2x":[145],"faster":[146],"inference,":[147],"significantly":[148],"advancing":[149],"Pareto":[151],"frontier.":[152]},"counts_by_year":[],"updated_date":"2026-06-16T09:24:06.705377","created_date":"2025-10-10T00:00:00"}
