{"id":"https://openalex.org/W4416037042","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.275","title":"CLIP-MoE: Towards Building Mixture of Experts for CLIP with Diversified Multiplet Upcycling","display_name":"CLIP-MoE: Towards Building Mixture of Experts for CLIP with Diversified Multiplet Upcycling","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416037042","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.275"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.275","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.275","pdf_url":"https://aclanthology.org/2025.emnlp-main.275.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.275.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101751726","display_name":"Jihai Zhang","orcid":"https://orcid.org/0000-0003-4242-5341"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jihai Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065871371","display_name":"Xiaoye Qu","orcid":"https://orcid.org/0000-0002-4907-3978"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoye Qu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075497696","display_name":"Tong Zhu","orcid":"https://orcid.org/0000-0002-2488-6366"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tong Zhu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101653971","display_name":"Yu Cheng","orcid":"https://orcid.org/0000-0002-7121-4036"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu Cheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101751726"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.837,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.91344854,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5406","last_page":"5419"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.541100025177002,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.541100025177002,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.22139999270439148,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.025800000876188278,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multiplet","display_name":"Multiplet","score":0.4828999936580658},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.2921999990940094},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.27810001373291016},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.27399998903274536},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.2736000120639801}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5260000228881836},{"id":"https://openalex.org/C133871045","wikidata":"https://www.wikidata.org/wiki/Q4598301","display_name":"Multiplet","level":3,"score":0.4828999936580658},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3521000146865845},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2858000099658966},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.27810001373291016},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2623000144958496},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.25519999861717224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.275","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.275","pdf_url":"https://aclanthology.org/2025.emnlp-main.275.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.275","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.275","pdf_url":"https://aclanthology.org/2025.emnlp-main.275.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416037042.pdf","grobid_xml":"https://content.openalex.org/works/W4416037042.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Contrastive":[0],"Language-Image":[1],"Pre-training":[2],"(CLIP)":[3],"has":[4],"become":[5],"a":[6,39,44,54,58,76,83,111,116,157],"cornerstone":[7],"in":[8],"multimodal":[9],"intelligence.However,":[10],"recent":[11],"studies":[12],"discovered":[13],"that":[14,42],"CLIP":[15,48,70,78,119],"can":[16],"only":[17],"encode":[18],"one":[19],"aspect":[20],"of":[21,46,67,118,136],"the":[22,133],"feature":[23,95],"space,":[24],"leading":[25],"to":[26],"substantial":[27],"information":[28],"loss":[29],"and":[30,50,79,128,146],"indistinctive":[31],"features.To":[32],"mitigate":[33],"this":[34,36],"issue,":[35],"paper":[37],"introduces":[38],"novel":[40],"strategy":[41],"fine-tunes":[43,80],"series":[45],"complementary":[47],"models":[49,71,102],"transforms":[51],"them":[52,109],"into":[53,82,110],"CLIP-MoE.Specifically,":[55],"we":[56,107],"propose":[57],"model-agnostic":[59],"Diversified":[60],"Multiplet":[61],"Upcycling":[62],"(DMU)":[63],"framework":[64],"for":[65],"CLIP.Instead":[66],"training":[68],"multiple":[69],"from":[72],"scratch,":[73],"DMU":[74],"leverages":[75],"pre-trained":[77],"it":[81],"diverse":[84],"set":[85],"with":[86],"highly":[87],"cost-effective":[88],"multistage":[89],"contrastive":[90],"learning,":[91],"thus":[92],"capturing":[93],"distinct":[94],"subspaces":[96],"efficiently.To":[97],"fully":[98],"exploit":[99],"these":[100],"fine-tuned":[101],"while":[103],"minimizing":[104],"computational":[105,129],"overhead,":[106],"transform":[108],"CLIP-MoE,":[112],"which":[113],"dynamically":[114],"activates":[115],"subset":[117],"experts,":[120],"achieving":[121],"an":[122],"effective":[123],"balance":[124],"between":[125],"model":[126],"capacity":[127],"cost.Comprehensive":[130],"experiments":[131],"demonstrate":[132],"superior":[134],"performance":[135],"CLIP-MoE":[137],"across":[138],"various":[139],"zero-shot":[140,142],"retrieval,":[141],"image":[143],"classification":[144],"tasks,":[145],"downstream":[147],"Multimodal":[148],"Large":[149],"Language":[150],"Model":[151],"(MLLM)":[152],"benchmarks":[153],"when":[154],"used":[155],"as":[156],"vision":[158],"encoder.Code":[159],"is":[160],"available":[161],"at":[162],"https:":[163],"//github.com/OpenSparseLLMs/CLIP-MoE.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-13T14:20:09.374765","created_date":"2025-11-08T00:00:00"}
