{"id":"https://openalex.org/W7161826946","doi":"https://doi.org/10.48550/arxiv.2605.18795","title":"HELLoRA: Hot Experts Layer-Level Low-Rank Adaptation for Mixture-of-Experts Models","display_name":"HELLoRA: Hot Experts Layer-Level Low-Rank Adaptation for Mixture-of-Experts Models","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7161826946","doi":"https://doi.org/10.48550/arxiv.2605.18795"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.18795","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18795","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.18795","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136531467","display_name":"Jia Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Jia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136564069","display_name":"Zhonghao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhonghao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136551736","display_name":"Ping Chen","orcid":"https://orcid.org/0000-0001-6707-5419"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136608198","display_name":"Qianyang li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"li, Qianyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000130442","display_name":"Yancheng Pan","orcid":"https://orcid.org/0000-0002-3827-8716"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Yancheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136557580","display_name":"Shaoxun Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shaoxun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113095185","display_name":"Ziyi Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Ziyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5107808752","display_name":"Longxiang Wang","orcid":"https://orcid.org/0009-0005-1403-0811"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Longxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.4523000121116638,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.4523000121116638,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.14839999377727509,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07609999924898148,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.751800000667572},{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.6294000148773193},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5508000254631042},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.49729999899864197},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.4537999927997589},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.45170000195503235},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.42730000615119934}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.805400013923645},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.751800000667572},{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.6294000148773193},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5508000254631042},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.49729999899864197},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4925999939441681},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.4537999927997589},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.45170000195503235},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4406000077724457},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.42730000615119934},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.382099986076355},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.36959999799728394},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3578000068664551},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3571000099182129},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.2784000039100647},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.25859999656677246}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.18795","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18795","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.18795","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18795","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Low-Rank":[0,41],"Adaptation":[1,42],"(LoRA)":[2],"dominates":[3],"parameter-efficient":[4],"fine-tuning":[5],"of":[6,78,146,174],"large":[7],"language":[8,196],"models,":[9],"yet":[10],"most":[11,51],"variants":[12],"target":[13],"dense":[14],"architectures.":[15],"Mixture-of-Experts":[16],"(MoE)":[17],"models":[18],"scale":[19],"parameters":[20,63],"at":[21,55],"near-constant":[22],"per-token":[23],"compute,":[24],"and":[25,64,106,117,119,128,160,188],"their":[26],"sparse":[27],"activation":[28],"patterns":[29],"create":[30],"untapped":[31],"opportunities":[32],"for":[33,194],"more":[34],"efficient":[35],"adaptation.":[36],"We":[37],"propose":[38],"Hot-Experts":[39],"Layer-level":[40],"(HELLoRA),":[43],"which":[44,102],"attaches":[45],"LoRA":[46,140,169],"modules":[47],"only":[48,172],"to":[49,75,99,138,191],"the":[50,104,108,147,157],"frequently":[52],"activated":[53],"experts":[54],"each":[56],"layer.":[57],"This":[58],"simple":[59],"mechanism":[60],"reduces":[61,150],"trainable":[62,148,176],"adapter-induced":[65],"FLOPs":[66,152],"while":[67,170],"improving":[68],"downstream":[69],"performance,":[70],"an":[71,186],"effect":[72],"we":[73,93],"attribute":[74],"a":[76],"form":[77,100],"structured":[79],"regularization":[80],"that":[81,181],"preserves":[82],"pretrained":[83],"expert":[84],"specialization.":[85],"To":[86],"stress-test":[87],"HELLoRA":[88,131,143,167],"under":[89],"extreme":[90],"parameter":[91],"budgets,":[92],"further":[94],"compose":[95],"it":[96],"with":[97],"LoRI":[98],"HELLoRI,":[101],"freezes":[103],"up-projection":[105],"sparsifies":[107],"down-projection.":[109],"Across":[110],"three":[111,120],"MoE":[112,195],"backbones,":[113],"namely":[114],"OlMoE-1B-7B,":[115],"Mixtral-8x7B,":[116],"DeepSeekMoE,":[118,166],"task":[121],"families":[122],"covering":[123],"mathematical":[124],"reasoning,":[125],"code":[126],"generation,":[127],"safety":[129],"alignment,":[130],"consistently":[132],"outperforms":[133,168],"strong":[134],"PEFT":[135,193],"baselines.":[136],"Relative":[137],"vanilla":[139],"on":[141],"OlMoE,":[142],"uses":[144],"15.7%":[145],"parameters,":[149],"adapter":[151,183],"by":[153,163],"38.7%,":[154],"achieves":[155],"1.9x":[156],"training":[158],"throughput,":[159],"improves":[161],"accuracy":[162],"9.2%.":[164],"On":[165],"using":[171],"23.2%":[173],"its":[175],"parameters.":[177],"These":[178],"results":[179],"demonstrate":[180],"activation-aware":[182],"placement":[184],"is":[185],"effective":[187],"practical":[189],"route":[190],"scaling":[192],"models.":[197]},"counts_by_year":[],"updated_date":"2026-07-01T08:55:40.977307","created_date":"2026-05-21T00:00:00"}
