{"id":"https://openalex.org/W4392384347","doi":"https://doi.org/10.1145/3616855.3635690","title":"Making Small Language Models Better Multi-task Learners with Mixture-of-Task-Adapters","display_name":"Making Small Language Models Better Multi-task Learners with Mixture-of-Task-Adapters","publication_year":2024,"publication_date":"2024-03-04","ids":{"openalex":"https://openalex.org/W4392384347","doi":"https://doi.org/10.1145/3616855.3635690"},"language":"en","primary_location":{"id":"doi:10.1145/3616855.3635690","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3616855.3635690","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113132472","display_name":"Yukang Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]},{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yukang Xie","raw_affiliation_strings":["South China University of Technology &amp; Alibaba Group, Guangzhou, China"],"raw_orcid":"https://orcid.org/0009-0009-0210-4595","affiliations":[{"raw_affiliation_string":"South China University of Technology &amp; Alibaba Group, Guangzhou, China","institution_ids":["https://openalex.org/I45928872","https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373451","display_name":"Chengyu Wang","orcid":"https://orcid.org/0000-0003-1010-9678"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengyu Wang","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0003-1010-9678","affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022633033","display_name":"Junbing Yan","orcid":"https://orcid.org/0009-0001-5588-4793"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]},{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junbing Yan","raw_affiliation_strings":["East China Normal University &amp; Alibaba Group, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0001-5588-4793","affiliations":[{"raw_affiliation_string":"East China Normal University &amp; Alibaba Group, Shanghai, China","institution_ids":["https://openalex.org/I45928872","https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043241620","display_name":"J Zhou","orcid":"https://orcid.org/0009-0001-2983-6819"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiyong Zhou","raw_affiliation_strings":["South China University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0009-0001-2983-6819","affiliations":[{"raw_affiliation_string":"South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063134490","display_name":"Feiqi Deng","orcid":"https://orcid.org/0000-0002-0257-5647"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feiqi Deng","raw_affiliation_strings":["South China University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-0257-5647","affiliations":[{"raw_affiliation_string":"South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054621636","display_name":"Jun Huang","orcid":"https://orcid.org/0000-0002-7706-7081"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Huang","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-7706-7081","affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5113132472"],"corresponding_institution_ids":["https://openalex.org/I45928872","https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":1.3245,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82321716,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1094","last_page":"1097"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8390954732894897},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.654078483581543},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6445714831352234},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6037906408309937},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.5378633141517639},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5231431722640991},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.470651775598526},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4547693729400635},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4405454099178314},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.41398006677627563},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07565730810165405}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8390954732894897},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.654078483581543},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6445714831352234},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6037906408309937},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.5378633141517639},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5231431722640991},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.470651775598526},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4547693729400635},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4405454099178314},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.41398006677627563},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07565730810165405},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3616855.3635690","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3616855.3635690","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6600000262260437}],"awards":[],"funders":[{"id":"https://openalex.org/F4320323059","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2150884987","https://openalex.org/W2160660844","https://openalex.org/W2163455955","https://openalex.org/W2251939518","https://openalex.org/W2884464324","https://openalex.org/W4385572787"],"related_works":["https://openalex.org/W3013624417","https://openalex.org/W4287826556","https://openalex.org/W3098382480","https://openalex.org/W4287598411","https://openalex.org/W3094871513","https://openalex.org/W3100913109","https://openalex.org/W3198458223","https://openalex.org/W4288365749","https://openalex.org/W2936497627","https://openalex.org/W2964413124"],"abstract_inverted_index":{"Recently,":[0],"Large":[1],"Language":[2,16],"Models":[3],"(LLMs)":[4],"have":[5,157],"achieved":[6],"amazing":[7],"zero-shot":[8],"learning":[9],"performance":[10],"over":[11,133],"a":[12,50,127,134],"variety":[13],"of":[14,29,38,136],"Natural":[15],"Processing":[17],"(NLP)":[18],"tasks,":[19,79],"especially":[20],"for":[21,102,163],"text":[22],"generative":[23],"tasks.":[24],"Yet,":[25],"the":[26,34,55,74,91,99,103,108,122,146],"large":[27],"size":[28],"LLMs":[30],"often":[31],"leads":[32],"to":[33,67,82,98,106,120],"high":[35],"computational":[36,129],"cost":[37],"model":[39,105],"training":[40,115,148],"and":[41,76,110,145],"online":[42],"deployment.":[43],"In":[44],"our":[45,141],"work,":[46],"we":[47,89,156],"present":[48],"ALTER,":[49,88,155],"system":[51],"that":[52,140],"effectively":[53],"builds":[54],"multi-tAsk":[56],"Learners":[57],"with":[58],"mixTure-of-task-adaptERs":[59],"upon":[60],"small":[61,128],"language":[62,161],"models":[63,162],"(with":[64],"<1B":[65],"parameters)":[66],"address":[68],"multiple":[69],"NLP":[70,137],"tasks":[71,138],"simultaneously,":[72],"capturing":[73],"commonalities":[75],"differences":[77],"between":[78,124],"in":[80,87],"order":[81],"support":[83],"domain-specific":[84],"applications.":[85],"Specifically,":[86],"propose":[90],"Mixture-of-Task-Adapters":[92],"(MTA)":[93],"module":[94],"as":[95],"an":[96],"extension":[97],"transformer":[100],"architecture":[101,144],"underlying":[104],"capture":[107],"intra-task":[109],"inter-task":[111],"knowledge.":[112],"A":[113],"two-stage":[114,147],"method":[116,149],"is":[117],"further":[118],"proposed":[119,142],"optimize":[121],"collaboration":[123],"adapters":[125],"at":[126],"cost.":[130],"Experimental":[131],"results":[132],"mixture":[135],"show":[139],"MTA":[143],"achieve":[150],"good":[151],"performance.":[152],"Based":[153],"on":[154],"also":[158],"produced":[159],"MTA-equipped":[160],"various":[164],"domains.":[165]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
