{"id":"https://openalex.org/W4402167571","doi":"https://doi.org/10.1109/tetci.2024.3418837","title":"ARC: A Layer Replacement Compression Method Based on Fine-Grained Self-Attention Distillation for Compressing Pre-Trained Language Models","display_name":"ARC: A Layer Replacement Compression Method Based on Fine-Grained Self-Attention Distillation for Compressing Pre-Trained Language Models","publication_year":2024,"publication_date":"2024-09-03","ids":{"openalex":"https://openalex.org/W4402167571","doi":"https://doi.org/10.1109/tetci.2024.3418837"},"language":"en","primary_location":{"id":"doi:10.1109/tetci.2024.3418837","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2024.3418837","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009322313","display_name":"Daohan Yu","orcid":"https://orcid.org/0009-0007-6675-6512"},"institutions":[{"id":"https://openalex.org/I80143920","display_name":"Shandong University of Science and Technology","ror":"https://ror.org/04gtjhw98","country_code":"CN","type":"education","lineage":["https://openalex.org/I80143920"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daohan Yu","raw_affiliation_strings":["Shandong University of Science and Technology, Qingdao, China"],"raw_orcid":"https://orcid.org/0009-0007-6675-6512","affiliations":[{"raw_affiliation_string":"Shandong University of Science and Technology, Qingdao, China","institution_ids":["https://openalex.org/I80143920"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054695808","display_name":"Liqing Qiu","orcid":"https://orcid.org/0000-0002-9184-2742"},"institutions":[{"id":"https://openalex.org/I80143920","display_name":"Shandong University of Science and Technology","ror":"https://ror.org/04gtjhw98","country_code":"CN","type":"education","lineage":["https://openalex.org/I80143920"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liqing Qiu","raw_affiliation_strings":["Shandong University of Science and Technology, Qingdao, China"],"raw_orcid":"https://orcid.org/0000-0002-9184-2742","affiliations":[{"raw_affiliation_string":"Shandong University of Science and Technology, Qingdao, China","institution_ids":["https://openalex.org/I80143920"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I80143920"],"apc_list":null,"apc_paid":null,"fwci":0.2986,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.63916812,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"9","issue":"1","first_page":"848","last_page":"860"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.972599983215332,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.930400013923645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.7005693912506104},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5837398767471313},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5460060834884644},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.5239298939704895},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.2732057571411133},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.19293907284736633},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.09728971123695374},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.07038497924804688}],"concepts":[{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.7005693912506104},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5837398767471313},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5460060834884644},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.5239298939704895},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.2732057571411133},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.19293907284736633},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.09728971123695374},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.07038497924804688}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tetci.2024.3418837","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2024.3418837","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1566289585","https://openalex.org/W2896457183","https://openalex.org/W2923014074","https://openalex.org/W2963350559","https://openalex.org/W2970454332","https://openalex.org/W2972324944","https://openalex.org/W2997006708","https://openalex.org/W3034457371","https://openalex.org/W3101248447","https://openalex.org/W3105966348","https://openalex.org/W3174544005","https://openalex.org/W4206913945","https://openalex.org/W4224075564","https://openalex.org/W4380993498","https://openalex.org/W4382463788","https://openalex.org/W4385245566","https://openalex.org/W4385571831","https://openalex.org/W6631190155","https://openalex.org/W6637551013","https://openalex.org/W6637709462","https://openalex.org/W6638523607","https://openalex.org/W6683826617","https://openalex.org/W6729471031","https://openalex.org/W6730179637","https://openalex.org/W6760732026","https://openalex.org/W6762493409","https://openalex.org/W6762945437","https://openalex.org/W6767719158","https://openalex.org/W6768851824","https://openalex.org/W6773815586","https://openalex.org/W6775706467"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"The":[0],"primary":[1],"objective":[2],"of":[3,11,33,108,131,148,171,179,192,216],"model":[4,14,34,72,83,111,152,172],"compression":[5,35,84,156,162,173],"is":[6],"to":[7,37,53,73],"maintain":[8],"the":[9,12,27,31,71,105,109,117,128,132,137,145,149,154,168,177,180,196,203],"performance":[10],"original":[13,110],"while":[15],"reducing":[16],"its":[17,38],"size":[18],"as":[19,21],"much":[20],"possible.":[22],"Knowledge":[23],"distillation":[24,44,115,119,187],"has":[25],"become":[26],"mainstream":[28],"method":[29,85,102,205],"in":[30,69,116,153,214],"field":[32],"due":[36],"excellent":[39],"performance.":[40],"However,":[41],"current":[42],"knowledge":[43,56,147,186],"methods":[45,62,188],"for":[46,189],"medium":[47],"and":[48,218],"small":[49],"pre-trained":[50,59,65,151,190],"models":[51,66,191],"struggle":[52],"effectively":[54],"extract":[55],"from":[57],"large":[58,64,133,150],"models.":[60],"Similarly,":[61],"targeting":[63],"face":[67],"challenges":[68],"compressing":[70],"a":[74,81],"smaller":[75],"scale.":[76],"Therefore,":[77],"this":[78],"paper":[79,184],"proposes":[80],"new":[82],"called":[86],"Attention-based":[87],"Replacement":[88],"Compression":[89],"(ARC),":[90],"which":[91],"introduces":[92],"layer":[93],"random":[94,140],"replacement":[95,141],"based":[96],"on":[97,195],"fine-grained":[98,113],"self-attention":[99,114],"distillation.":[100],"This":[101,183],"first":[103],"obtains":[104],"important":[106],"features":[107],"through":[112],"pre-training":[118],"stage.":[120,157],"More":[121],"information":[122],"can":[123],"be":[124],"obtained":[125],"by":[126],"extracting":[127],"upper":[129],"layers":[130],"teacher":[134],"model.":[135,182],"Then,":[136],"one-to-one":[138],"Transformer-layer":[139],"training":[142,169],"fully":[143],"explores":[144],"hidden":[146],"fine-tuning":[155],"Compared":[158],"with":[159],"other":[160],"complex":[161],"methods,":[163],"ARC":[164],"not":[165],"only":[166],"simplifies":[167],"process":[170],"but":[174],"also":[175],"enhances":[176],"applicability":[178],"compressed":[181],"compares":[185],"different":[193,210],"sizes":[194],"GLUE":[197],"benchmark.":[198],"Experimental":[199],"results":[200],"demonstrate":[201],"that":[202],"proposed":[204],"achieves":[206],"significant":[207],"improvements":[208],"across":[209],"parameter":[211],"scales,":[212],"especially":[213],"terms":[215],"accuracy":[217],"inference":[219],"speed.":[220]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
