{"id":"https://openalex.org/W4300973453","doi":"https://doi.org/10.48550/arxiv.2206.04046","title":"Sparse Mixture-of-Experts are Domain Generalizable Learners","display_name":"Sparse Mixture-of-Experts are Domain Generalizable Learners","publication_year":2022,"publication_date":"2022-06-08","ids":{"openalex":"https://openalex.org/W4300973453","doi":"https://doi.org/10.48550/arxiv.2206.04046"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2206.04046","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.04046","pdf_url":"https://arxiv.org/pdf/2206.04046","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2206.04046","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100374360","display_name":"Bo Li","orcid":"https://orcid.org/0000-0001-6709-0942"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114889635","display_name":"Yifei Shen","orcid":"https://orcid.org/0000-0001-7174-4793"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Yifei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075948339","display_name":"Jingkang Yang","orcid":"https://orcid.org/0000-0002-9424-254X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jingkang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049116845","display_name":"Yezhen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yezhen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102406361","display_name":"Jiawei Ren","orcid":"https://orcid.org/0000-0003-1950-5976"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Jiawei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009243342","display_name":"Tong Che","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Che, Tong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100433246","display_name":"Jun Zhang","orcid":"https://orcid.org/0000-0003-1017-7179"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100406050","display_name":"Ziwei Liu","orcid":"https://orcid.org/0000-0002-4220-5958"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ziwei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100374360"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9767000079154968,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9174000024795532,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7720978260040283},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6342599391937256},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6186460852622986},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.605772852897644},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5927259922027588},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5578259825706482},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.47768765687942505},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.45845168828964233},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.10436096787452698},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07802820205688477}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7720978260040283},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6342599391937256},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6186460852622986},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.605772852897644},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5927259922027588},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5578259825706482},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.47768765687942505},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.45845168828964233},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.10436096787452698},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07802820205688477},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2206.04046","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.04046","pdf_url":"https://arxiv.org/pdf/2206.04046","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-127530","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-127530","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference paper"},{"id":"doi:10.48550/arxiv.2206.04046","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2206.04046","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2206.04046","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.04046","pdf_url":"https://arxiv.org/pdf/2206.04046","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3125011624","https://openalex.org/W1508631387","https://openalex.org/W2370917603","https://openalex.org/W2952760143","https://openalex.org/W2017776670","https://openalex.org/W2347897961","https://openalex.org/W2340870721","https://openalex.org/W2358318464","https://openalex.org/W2979236518","https://openalex.org/W3091955004"],"abstract_inverted_index":{"Human":[0],"visual":[1,8],"perception":[2],"can":[3],"easily":[4],"generalize":[5],"to":[6,25,45,90,95,113,149],"out-of-distributed":[7],"data,":[9],"which":[10],"is":[11,58,147,156],"far":[12],"beyond":[13],"the":[14,36,51,54,104,107],"capability":[15],"of":[16,53],"modern":[17],"machine":[18],"learning":[19],"models.":[20],"Domain":[21],"generalization":[22],"(DG)":[23],"aims":[24],"close":[26],"this":[27,41],"gap,":[28],"with":[29,68,103,135,161],"existing":[30,150],"DG":[31,79,83,117,139,151,162],"methods":[32,152],"mainly":[33],"focusing":[34],"on":[35,81,129],"loss":[37],"function":[38],"design.":[39],"In":[40],"paper,":[42],"we":[43],"propose":[44,114],"explore":[46],"an":[47,61],"orthogonal":[48],"direction,":[49],"i.e.,":[50],"design":[52],"backbone":[55],"architecture.":[56],"It":[57],"motivated":[59],"by":[60,98,141],"empirical":[62,69],"finding":[63],"that":[64,132],"transformer-based":[65],"models":[66,75],"trained":[67,134,160],"risk":[70],"minimization":[71],"(ERM)":[72],"outperform":[73],"CNN-based":[74],"employing":[76],"state-of-the-art":[77],"(SOTA)":[78],"algorithms":[80],"multiple":[82],"datasets.":[84],"We":[85],"develop":[86],"a":[87,92,115,142],"formal":[88],"framework":[89],"characterize":[91],"network's":[93],"robustness":[94],"distribution":[96],"shifts":[97],"studying":[99],"its":[100,154],"architecture's":[101],"alignment":[102],"correlations":[105],"in":[106],"dataset.":[108],"This":[109],"analysis":[110],"guides":[111],"us":[112],"novel":[116],"model":[118],"built":[119],"upon":[120],"vision":[121],"transformers,":[122],"namely":[123],"Generalizable":[124],"Mixture-of-Experts":[125],"(GMoE).":[126],"Extensive":[127],"experiments":[128],"DomainBed":[130],"demonstrate":[131],"GMoE":[133,146],"ERM":[136],"outperforms":[137],"SOTA":[138],"baselines":[140],"large":[143],"margin.":[144],"Moreover,":[145],"complementary":[148],"and":[153],"performance":[155],"substantially":[157],"improved":[158],"when":[159],"algorithms.":[163]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
