{"id":"https://openalex.org/W4402683876","doi":"https://doi.org/10.18653/v1/2024.findings-acl.582","title":"Pruning Large Language Models to Intra-module Low-rank Architecture with Transitional Activations","display_name":"Pruning Large Language Models to Intra-module Low-rank Architecture with Transitional Activations","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4402683876","doi":"https://doi.org/10.18653/v1/2024.findings-acl.582"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2024.findings-acl.582","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-acl.582","pdf_url":"https://aclanthology.org/2024.findings-acl.582.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics ACL 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2024.findings-acl.582.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053914891","display_name":"Bowen Shen","orcid":"https://orcid.org/0000-0002-2298-6857"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bowen Shen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067997634","display_name":"Zheng Lin","orcid":"https://orcid.org/0000-0002-8432-1658"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051666209","display_name":"Daren Zha","orcid":"https://orcid.org/0009-0002-6042-3454"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daren Zha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108336164","display_name":"Wei Liu","orcid":"https://orcid.org/0009-0006-3943-9493"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054843960","display_name":"Jian Luan","orcid":"https://orcid.org/0000-0002-2383-226X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jian Luan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059575276","display_name":"Bin Wang","orcid":"https://orcid.org/0000-0003-4748-6426"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bin Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100442323","display_name":"Weiping Wang","orcid":"https://orcid.org/0000-0003-3237-8107"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weiping Wang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5053914891"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14043609,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"9781","last_page":"9793"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9064000248908997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9064000248908997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7527387142181396},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.6749704480171204},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6402420997619629},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.6315310001373291},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44903987646102905},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4379822015762329},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4214095175266266},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4212409257888794},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3794938325881958},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11803844571113586}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7527387142181396},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.6749704480171204},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6402420997619629},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.6315310001373291},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44903987646102905},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4379822015762329},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4214095175266266},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4212409257888794},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3794938325881958},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11803844571113586},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2024.findings-acl.582","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-acl.582","pdf_url":"https://aclanthology.org/2024.findings-acl.582.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics ACL 2024","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2024.findings-acl.582","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-acl.582","pdf_url":"https://aclanthology.org/2024.findings-acl.582.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics ACL 2024","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402683876.pdf","grobid_xml":"https://content.openalex.org/works/W4402683876.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2373300491","https://openalex.org/W2378744544","https://openalex.org/W2594301978","https://openalex.org/W2379704676","https://openalex.org/W1998810860","https://openalex.org/W4206442282","https://openalex.org/W2384505857","https://openalex.org/W2355171581","https://openalex.org/W2145253956","https://openalex.org/W2038503502"],"abstract_inverted_index":{"Structured":[0],"pruning":[1,39,66,153],"fundamentally":[2],"reduces":[3,79],"computational":[4],"and":[5,13,26,33,86,115,124,143,154,163],"memory":[6],"overheads":[7],"of":[8,132,150,161],"large":[9,41],"language":[10],"models":[11,23],"(LLMs)":[12],"offers":[14],"a":[15,49,57,63,70],"feasible":[16],"solution":[17],"for":[18,53],"end-side":[19],"LLM":[20,102],"deployment.Structurally":[21],"pruned":[22,104],"remain":[24],"dense":[25],"highprecision,":[27],"highly":[28,45],"compatible":[29],"with":[30,69,138],"further":[31],"tuning":[32],"compression.However,":[34],"as":[35],"the":[36,44,93,101,121,130,148,159],"coarsegrained":[37],"structured":[38,65],"poses":[40],"damage":[42],"to":[43,99,140],"interconnected":[46],"model,":[47],"achieving":[48],"high":[50,136],"compression":[51,137],"ratio":[52],"scaled-up":[54],"LLMs":[55],"remains":[56],"challenge.In":[58],"this":[59],"paper,":[60],"we":[61],"introduce":[62],"task-agnostic":[64],"approach":[67,134],"coupled":[68],"compact":[71],"Transformer":[72],"architecture":[73],"design.The":[74],"proposed":[75],"approach,":[76],"named":[77],"TransAct,":[78],"transitional":[80],"activations":[81,95],"inside":[82],"multi-head":[83],"attention":[84,116],"(MHA)":[85],"multi-layer":[87],"perceptron":[88],"(MLP)":[89],"modules,":[90],"while":[91],"preserving":[92],"inter-module":[94],"that":[96],"are":[97],"sensitive":[98],"perturbations.Hence,":[100],"is":[103,118],"into":[105],"an":[106],"intra-module":[107],"low-rank":[108],"architecture,":[109],"significantly":[110],"reducing":[111],"weights,":[112],"KV":[113],"Cache":[114],"computation.Trans-Act":[117],"implemented":[119],"on":[120,126,158],"LLaMA":[122],"model":[123],"evaluated":[125],"downstream":[127],"benchmarks.Results":[128],"verify":[129],"optimality":[131],"our":[133],"at":[135],"respect":[139],"both":[141],"efficiency":[142],"performance.Further,":[144],"ablation":[145],"studies":[146],"reveal":[147],"strength":[149],"activation-guided":[151],"iterative":[152],"provide":[155],"experimental":[156],"analysis":[157],"redundancy":[160],"MHA":[162],"MLP":[164],"modules.":[165]},"counts_by_year":[],"updated_date":"2026-03-12T06:13:28.667946","created_date":"2025-10-10T00:00:00"}
