{"id":"https://openalex.org/W7136873600","doi":"https://doi.org/10.48550/arxiv.2603.13213","title":"MoEKD: Mixture-of-Experts Knowledge Distillation for Robust and High-Performing Compressed Code Models","display_name":"MoEKD: Mixture-of-Experts Knowledge Distillation for Robust and High-Performing Compressed Code Models","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7136873600","doi":"https://doi.org/10.48550/arxiv.2603.13213"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13213","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13213","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13213","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129520321","display_name":"Md. Abdul Awal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Awal, Md. Abdul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046862791","display_name":"Mrigank Rochan","orcid":"https://orcid.org/0000-0001-9513-6573"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rochan, Mrigank","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129596512","display_name":"Chanchal K. Roy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roy, Chanchal K.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.8956999778747559,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.8956999778747559,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.018400000408291817,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.017799999564886093,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.7014999985694885},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4878999888896942},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.45980000495910645},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.412200003862381},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.3765000104904175},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.36980000138282776},{"id":"https://openalex.org/keywords/fractionating-column","display_name":"Fractionating column","score":0.3553999960422516},{"id":"https://openalex.org/keywords/knowledge-engineering","display_name":"Knowledge engineering","score":0.35440000891685486}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7378000020980835},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.7014999985694885},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5099999904632568},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4878999888896942},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.45980000495910645},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.412200003862381},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3968999981880188},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3765000104904175},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.36980000138282776},{"id":"https://openalex.org/C154030694","wikidata":"https://www.wikidata.org/wiki/Q1436074","display_name":"Fractionating column","level":3,"score":0.3553999960422516},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3547999858856201},{"id":"https://openalex.org/C84685590","wikidata":"https://www.wikidata.org/wiki/Q1540472","display_name":"Knowledge engineering","level":2,"score":0.35440000891685486},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.3416000008583069},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.3393000066280365},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.31850001215934753},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.314300000667572},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C2781411174","wikidata":"https://www.wikidata.org/wiki/Q8034662","display_name":"Workcell","level":3,"score":0.26440000534057617},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13213","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13213","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13213","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13213","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8933034539222717,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,61,215],"for":[3],"code":[4],"have":[5],"achieved":[6],"strong":[7],"performance":[8,190,219],"across":[9],"diverse":[10],"software":[11],"analytics":[12],"tasks,":[13],"yet":[14],"their":[15,222],"real-world":[16],"adoption":[17],"remains":[18],"limited":[19],"by":[20,40,182,191,226],"high":[21],"computational":[22],"demands,":[23],"slow":[24],"inference":[25],"speeds,":[26],"significant":[27],"energy":[28],"consumption,":[29],"and":[30,50,92,122,141,153,169,202],"environmental":[31],"impact.":[32],"Knowledge":[33,104],"distillation":[34,75,87,137,154],"(KD)":[35],"offers":[36],"a":[37,44,48,64,82,107,112,131,149],"practical":[38],"solution":[39],"transferring":[41,90],"knowledge":[42,124,147,212,237],"from":[43,63,126,155],"large":[45],"model":[46],"to":[47,118,184,193,196,216],"smaller":[49],"more":[51,120],"efficient":[52],"model.":[53,133],"Despite":[54],"its":[55],"effectiveness,":[56],"recent":[57],"studies":[58],"show":[59,174],"that":[60,110,175,209],"distilled":[62],"single":[65],"source":[66],"often":[67],"exhibit":[68],"degraded":[69],"adversarial":[70,180],"robustness,":[71],"even":[72,220],"when":[73,221],"robustness-aware":[74],"techniques":[76],"are":[77],"employed.":[78],"These":[79],"observations":[80],"suggest":[81],"fundamental":[83],"limitation":[84],"of":[85,102,114,145,235,243],"single-source":[86,245],"in":[88,239],"simultaneously":[89],"high-quality":[91],"robust":[93,123],"knowledge.":[94,158],"To":[95],"overcome":[96],"this":[97],"limitation,":[98],"we":[99],"propose":[100],"Mixture":[101,113],"Experts":[103,115],"Distillation":[105],"(MoEKD),":[106],"KD":[108,198,246],"framework":[109],"leverages":[111],"(MoE)":[116],"architecture":[117],"enable":[119],"effective":[121],"transfer":[125],"multiple":[127],"specialized":[128],"experts":[129],"into":[130,139],"compact":[132],"MoEKD":[134,161,176],"decomposes":[135],"the":[136,156,163,233],"process":[138],"expert":[140,146,211],"router":[142],"training,":[143],"aggregation":[144,238],"through":[148],"learned":[150],"routing":[151],"mechanism,":[152],"aggregated":[157],"We":[159],"evaluate":[160],"on":[162],"vulnerability":[164],"detection":[165],"task":[166],"using":[167],"CodeBERT":[168],"GraphCodeBERT":[170],"models.":[171],"Experimental":[172],"results":[173,231],"not":[177],"only":[178],"improves":[179],"robustness":[181],"up":[183,192],"35.8%,":[185],"but":[186],"also":[187],"enhances":[188],"predictive":[189],"13%,":[194],"compared":[195],"state-of-the-art":[197],"baselines,":[199],"including":[200],"Compressor":[201],"AVATAR.":[203],"Furthermore,":[204],"an":[205],"ablation":[206],"study":[207],"demonstrates":[208],"aggregating":[210],"enables":[213],"ultra-compact":[214],"maintain":[217],"competitive":[218],"size":[223],"is":[224],"reduced":[225],"approximately":[227],"half.":[228],"Overall,":[229],"these":[230],"highlight":[232],"effectiveness":[234],"multi-expert":[236],"addressing":[240],"key":[241],"limitations":[242],"existing":[244],"approaches.":[247]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-17T00:00:00"}
