{"id":"https://openalex.org/W4405259982","doi":"https://doi.org/10.48550/arxiv.2412.07405","title":"MoDULA: Mixture of Domain-Specific and Universal LoRA for Multi-Task Learning","display_name":"MoDULA: Mixture of Domain-Specific and Universal LoRA for Multi-Task Learning","publication_year":2024,"publication_date":"2024-12-10","ids":{"openalex":"https://openalex.org/W4405259982","doi":"https://doi.org/10.48550/arxiv.2412.07405"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2412.07405","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.07405","pdf_url":"https://arxiv.org/pdf/2412.07405","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.07405","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068840674","display_name":"Yufei Ma","orcid":"https://orcid.org/0000-0002-2670-524X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ma, Yufei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057994068","display_name":"Zihan Liang","orcid":"https://orcid.org/0009-0003-2373-1439"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Zihan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111142513","display_name":"H.-F. Dai","orcid":"https://orcid.org/0000-0002-3844-8359"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Huangyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070051912","display_name":"Ben Chen","orcid":"https://orcid.org/0000-0003-4495-8686"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ben","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062149619","display_name":"Dehong Gao","orcid":"https://orcid.org/0000-0002-6636-5702"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Dehong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114847667","display_name":"Zhuoran Ran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ran, Zhuoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zihan, Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zihan, Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075971838","display_name":"Linbo Jin","orcid":"https://orcid.org/0000-0003-3201-2113"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Linbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041289658","display_name":"Wen Jiang","orcid":"https://orcid.org/0000-0001-5429-2748"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Wen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031338085","display_name":"Guannan Zhang","orcid":"https://orcid.org/0000-0002-7091-2318"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Guannan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078351384","display_name":"Xiaoyan Cai","orcid":"https://orcid.org/0000-0002-1406-107X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Xiaoyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100527962","display_name":"Libin Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Libin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5068840674"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7778000235557556,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7778000235557556,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7464250326156616},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6746069192886353},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5765594244003296},{"id":"https://openalex.org/keywords/multi-task-learning","display_name":"Multi-task learning","score":0.4772734045982361},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4236794710159302},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33196982741355896},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13027846813201904},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.09520813822746277},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.0872824490070343}],"concepts":[{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7464250326156616},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6746069192886353},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5765594244003296},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.4772734045982361},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4236794710159302},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33196982741355896},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13027846813201904},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.09520813822746277},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0872824490070343},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2412.07405","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.07405","pdf_url":"https://arxiv.org/pdf/2412.07405","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.07405","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.07405","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.07405","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.07405","pdf_url":"https://arxiv.org/pdf/2412.07405","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1160214813","display_name":null,"funder_award_id":"62372380","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G192882225","display_name":null,"funder_award_id":"U22B2036","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3448811327","display_name":null,"funder_award_id":"62103374","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4068074153","display_name":null,"funder_award_id":"2024C01025","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4405259982.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W3046775127","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474"],"abstract_inverted_index":{"The":[0,67,110],"growing":[1],"demand":[2],"for":[3,16,58,160,194],"larger-scale":[4],"models":[5],"in":[6,29,64,140],"the":[7,71,75,92,97,115,119,161],"development":[8],"of":[9,74,118,126,164],"\\textbf{L}arge":[10],"\\textbf{L}anguage":[11],"\\textbf{M}odels":[12],"(LLMs)":[13],"poses":[14],"challenges":[15],"efficient":[17,162],"training":[18,37,78,145,175,182],"within":[19,91],"limited":[20],"computational":[21],"resources.":[22,38],"Traditional":[23],"fine-tuning":[24,60,128,195],"methods":[25,123,129],"often":[26],"exhibit":[27],"instability":[28],"multi-task":[30,65,72],"learning":[31],"and":[32,46,61,83,104,121,184,201],"rely":[33],"heavily":[34],"on":[35,130],"extensive":[36],"Here,":[39],"we":[40],"propose":[41],"MoDULA":[42,93,155,188],"(\\textbf{M}ixture":[43],"\\textbf{o}f":[44],"\\textbf{D}omain-Specific":[45],"\\textbf{U}niversal":[47],"\\textbf{L}oR\\textbf{A}),":[48],"a":[49,88,190],"novel":[50],"\\textbf{P}arameter":[51],"\\textbf{E}fficient":[52],"\\textbf{F}ine-\\textbf{T}uning":[53],"(PEFT)":[54],"\\textbf{M}ixture-\\textbf{o}f-\\textbf{E}xpert":[55],"(MoE)":[56],"paradigm":[57,68,176],"improved":[59],"parameter":[62,199],"efficiency":[63,183,200],"learning.":[66],"effectively":[69],"improves":[70],"capability":[73,100],"model":[76,185],"by":[77,101,147],"universal":[79,103],"experts,":[80,82],"domain-specific":[81],"routers":[84],"separately.":[85],"MoDULA-Res":[86,122,134],"is":[87],"new":[89,165],"method":[90],"paradigm,":[94],"which":[95],"maintains":[96],"model's":[98],"general":[99,152],"connecting":[102],"task-specific":[105],"experts":[106,170],"through":[107],"residual":[108],"connections.":[109],"experimental":[111],"results":[112],"demonstrate":[113],"that":[114,125],"overall":[116],"performance":[117,138],"MoDULA-Flan":[120],"surpasses":[124],"existing":[127,169],"various":[131],"LLMs.":[132],"Notably,":[133],"achieves":[135],"more":[136],"significant":[137],"improvements":[139],"multiple":[141],"tasks":[142,166],"while":[143],"reducing":[144],"costs":[146],"over":[148],"80\\%":[149],"without":[150,167],"losing":[151],"capability.":[153,203],"Moreover,":[154],"displays":[156],"flexible":[157],"pluggability,":[158],"allowing":[159],"addition":[163],"retraining":[168],"from":[171],"scratch.":[172],"This":[173],"progressive":[174],"circumvents":[177],"data":[178],"balancing":[179],"issues,":[180],"enhancing":[181],"stability.":[186],"Overall,":[187],"provides":[189],"scalable,":[191],"cost-effective":[192],"solution":[193],"LLMs":[196],"with":[197],"enhanced":[198],"generalization":[202]},"counts_by_year":[],"updated_date":"2026-05-10T08:33:47.465468","created_date":"2024-12-12T00:00:00"}
