{"id":"https://openalex.org/W4390393202","doi":"https://doi.org/10.1145/3639364","title":"Explanation Guided Knowledge Distillation for Pre-trained Language Model Compression","display_name":"Explanation Guided Knowledge Distillation for Pre-trained Language Model Compression","publication_year":2023,"publication_date":"2023-12-29","ids":{"openalex":"https://openalex.org/W4390393202","doi":"https://doi.org/10.1145/3639364"},"language":"en","primary_location":{"id":"doi:10.1145/3639364","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3639364","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030829461","display_name":"Zhao Yang","orcid":"https://orcid.org/0000-0003-2816-6486"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhao Yang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2816-6486","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102811282","display_name":"Yuanzhe Zhang","orcid":"https://orcid.org/0000-0001-9905-9501"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanzhe Zhang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9905-9501","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031774490","display_name":"Dianbo Sui","orcid":"https://orcid.org/0000-0002-5200-2265"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dianbo Sui","raw_affiliation_strings":["School of Computer Science, Harbin Institute of Technology at Weihai, China"],"raw_orcid":"https://orcid.org/0000-0002-5200-2265","affiliations":[{"raw_affiliation_string":"School of Computer Science, Harbin Institute of Technology at Weihai, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013838295","display_name":"Yiming Ju","orcid":"https://orcid.org/0009-0000-0188-7385"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiming Ju","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-0188-7385","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110722665","display_name":"Jun Zhao","orcid":"https://orcid.org/0000-0003-3370-2263"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Zhao","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3370-2263","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100389900","display_name":"Kang Liu","orcid":"https://orcid.org/0000-0002-6083-8433"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kang Liu","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-6083-8433","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China and The Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.487,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.72518211,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"23","issue":"2","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.8406150341033936},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7768577337265015},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5771933197975159},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5692868232727051},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5094292759895325},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.5015428066253662},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.08048412203788757}],"concepts":[{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.8406150341033936},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7768577337265015},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5771933197975159},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5692868232727051},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5094292759895325},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.5015428066253662},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.08048412203788757},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3639364","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3639364","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7900000214576721,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G119245639","display_name":null,"funder_award_id":"62306087","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5381013183","display_name":null,"funder_award_id":"61831022","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5688175948","display_name":null,"funder_award_id":"No. 61831022, No.62276264, and No. 62306087","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6919608582","display_name":null,"funder_award_id":"2022YFF0711900","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G7899663717","display_name":null,"funder_award_id":"ZR2023QF154","funder_id":"https://openalex.org/F4320324174","funder_display_name":"Natural Science Foundation of Shandong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322847","display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","ror":"https://ror.org/031141b54"},{"id":"https://openalex.org/F4320324174","display_name":"Natural Science Foundation of Shandong Province","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335892","display_name":"Youth Innovation Promotion Association","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1516184288","https://openalex.org/W1821462560","https://openalex.org/W2169393322","https://openalex.org/W2250539671","https://openalex.org/W2282821441","https://openalex.org/W2562979205","https://openalex.org/W2911785492","https://openalex.org/W2924902521","https://openalex.org/W2949227999","https://openalex.org/W2954929116","https://openalex.org/W2962851944","https://openalex.org/W2963233086","https://openalex.org/W2963341956","https://openalex.org/W2963748441","https://openalex.org/W2963846996","https://openalex.org/W2970454332","https://openalex.org/W2971296908","https://openalex.org/W2978017171","https://openalex.org/W2979691890","https://openalex.org/W2997666887","https://openalex.org/W3101155149","https://openalex.org/W3103884771","https://openalex.org/W3105966348","https://openalex.org/W3110300144","https://openalex.org/W3117378044","https://openalex.org/W3173849000","https://openalex.org/W3174828871","https://openalex.org/W3175175610","https://openalex.org/W3177265267","https://openalex.org/W3195577433","https://openalex.org/W3197901717","https://openalex.org/W3206843525","https://openalex.org/W4205952419","https://openalex.org/W4221139076","https://openalex.org/W4287560008","https://openalex.org/W4293861706","https://openalex.org/W4385245566","https://openalex.org/W6638523607","https://openalex.org/W6739575509","https://openalex.org/W6739901393","https://openalex.org/W6768807518","https://openalex.org/W6800751262"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W3046775127","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474"],"abstract_inverted_index":{"Knowledge":[0,76],"distillation":[1,24,99],"is":[2],"widely":[3],"used":[4],"in":[5,79,97,108],"pre-trained":[6],"language":[7],"model":[8,17,26,39,43,52,62,146,175],"compression,":[9],"which":[10,82,141],"can":[11,159],"transfer":[12],"knowledge":[13,23,92],"from":[14],"a":[15,19],"cumbersome":[16],"to":[18,85,121,130,174],"lightweight":[20],"one.":[21],"Though":[22],"based":[25],"compression":[27,176],"has":[28],"achieved":[29],"promising":[30],"performance,":[31],"we":[32,72,101,125],"observe":[33],"that":[34,49,156],"explanations":[35,84,96,133,158],"between":[36],"the":[37,41,50,57,60,65,87,132,144,161,164],"teacher":[38,61],"and":[40,90,116],"student":[42,51,145,165],"are":[44],"not":[45,55],"consistent.":[46],"We":[47],"argue":[48],"should":[53],"study":[54],"only":[56],"predictions":[58],"of":[59,163],"but":[63],"also":[64,171],"internal":[66],"reasoning":[67],"process.":[68],"To":[69,94],"this":[70,80],"end,":[71],"propose":[73,126],"Explanation":[74],"Guided":[75],"Distillation":[77],"(EGKD)":[78],"article,":[81],"utilizes":[83],"represent":[86],"thinking":[88],"process":[89],"improve":[91,122,160],"distillation.":[93],"obtain":[95],"our":[98,168],"framework,":[100],"select":[102],"three":[103,137],"typical":[104],"explanation":[105,139],"methods":[106],"rooted":[107],"different":[109,127,138,178],"mechanisms,":[110],"namely":[111],"gradient-based":[112],",":[113,115],"perturbation-based":[114],"feature":[117],"selection":[118],"methods.":[119],"Then,":[120],"computational":[123],"efficiency,":[124],"optimization":[128],"strategies":[129],"utilize":[131],"obtained":[134],"by":[135],"these":[136],"methods,":[140],"could":[142,170],"provide":[143],"with":[147,177],"better":[148],"learning":[149],"guidance.":[150],"Experimental":[151],"results":[152],"on":[153],"GLUE":[154],"demonstrate":[155],"leveraging":[157],"performance":[162],"model.":[166],"Moreover,":[167],"EGKD":[169],"be":[172],"applied":[173],"architectures.":[179]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-17T08:01:34.144755","created_date":"2025-10-10T00:00:00"}
