{"id":"https://openalex.org/W4379659785","doi":"https://doi.org/10.1145/3580305.3599278","title":"COMET: Learning Cardinality Constrained Mixture of Experts with Trees and Local Search","display_name":"COMET: Learning Cardinality Constrained Mixture of Experts with Trees and Local Search","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4379659785","doi":"https://doi.org/10.1145/3580305.3599278"},"language":"en","primary_location":{"id":"doi:10.1145/3580305.3599278","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599278","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599278","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599278","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112853348","display_name":"Shibal Ibrahim","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shibal Ibrahim","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100687327","display_name":"Wenyu Chen","orcid":"https://orcid.org/0009-0007-7952-7735"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenyu Chen","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084502311","display_name":"Hussein Hazimeh","orcid":"https://orcid.org/0000-0003-4501-0678"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hussein Hazimeh","raw_affiliation_strings":["Google Research, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, New York, NY, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102899387","display_name":"Natalia Ponomareva","orcid":"https://orcid.org/0009-0005-6761-1468"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Natalia Ponomareva","raw_affiliation_strings":["Google Research, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, New York, NY, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100631150","display_name":"Zhe Zhao","orcid":"https://orcid.org/0000-0002-6847-0186"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhe Zhao","raw_affiliation_strings":["Google DeepMind, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google DeepMind, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045271820","display_name":"Rahul Mazumder","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rahul Mazumder","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5112853348"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":0.2388,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.50096575,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"832","last_page":"844"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7180220484733582},{"id":"https://openalex.org/keywords/comet","display_name":"Comet","score":0.6330469846725464},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.5250603556632996},{"id":"https://openalex.org/keywords/cardinality","display_name":"Cardinality (data modeling)","score":0.48487910628318787},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.4531864523887634},{"id":"https://openalex.org/keywords/local-search","display_name":"Local search (optimization)","score":0.4293859004974365},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.36949634552001953},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36699819564819336},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.34359872341156006},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3266390562057495},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.2135949730873108}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7180220484733582},{"id":"https://openalex.org/C94081185","wikidata":"https://www.wikidata.org/wiki/Q3559","display_name":"Comet","level":2,"score":0.6330469846725464},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.5250603556632996},{"id":"https://openalex.org/C87117476","wikidata":"https://www.wikidata.org/wiki/Q362383","display_name":"Cardinality (data modeling)","level":2,"score":0.48487910628318787},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.4531864523887634},{"id":"https://openalex.org/C135320971","wikidata":"https://www.wikidata.org/wiki/Q1868524","display_name":"Local search (optimization)","level":2,"score":0.4293859004974365},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36949634552001953},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36699819564819336},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34359872341156006},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3266390562057495},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2135949730873108},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3580305.3599278","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599278","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599278","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2306.02824","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.02824","pdf_url":"https://arxiv.org/pdf/2306.02824","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:dspace.mit.edu:1721.1/152039","is_oa":true,"landing_page_url":"https://hdl.handle.net/1721.1/152039","pdf_url":"https://dspace.mit.edu/bitstream/1721.1/152039/1/3580305.3599278.pdf","source":{"id":"https://openalex.org/S4306400425","display_name":"DSpace@MIT (Massachusetts Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I63966007","host_organization_name":"Massachusetts Institute of Technology","host_organization_lineage":["https://openalex.org/I63966007"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Association for Computing Machinery","raw_type":"http://purl.org/eprint/type/ConferencePaper"}],"best_oa_location":{"id":"doi:10.1145/3580305.3599278","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599278","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599278","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4776870722","display_name":null,"funder_award_id":"unknown","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G7833833757","display_name":null,"funder_award_id":"ONR-N000142112841","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G8876996369","display_name":null,"funder_award_id":"N00014","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320309327","display_name":"Google","ror":"https://ror.org/00njsd438"},{"id":"https://openalex.org/F4320322724","display_name":"Ministry of Education, India","ror":"https://ror.org/048xjjh50"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4379659785.pdf","grobid_xml":"https://content.openalex.org/works/W4379659785.grobid-xml"},"referenced_works_count":67,"referenced_works":["https://openalex.org/W131533222","https://openalex.org/W1515620500","https://openalex.org/W1599016936","https://openalex.org/W1665115054","https://openalex.org/W1762430620","https://openalex.org/W1834627138","https://openalex.org/W2007339694","https://openalex.org/W2027628336","https://openalex.org/W2075183947","https://openalex.org/W2083346837","https://openalex.org/W2117354486","https://openalex.org/W2130158090","https://openalex.org/W2150884987","https://openalex.org/W2155912844","https://openalex.org/W2182396527","https://openalex.org/W2219888463","https://openalex.org/W2220384803","https://openalex.org/W2222512263","https://openalex.org/W2242818861","https://openalex.org/W2251939518","https://openalex.org/W2335728318","https://openalex.org/W2529099292","https://openalex.org/W2739351760","https://openalex.org/W2787434568","https://openalex.org/W2809290718","https://openalex.org/W2896457183","https://openalex.org/W2912083425","https://openalex.org/W2923014074","https://openalex.org/W2963323070","https://openalex.org/W2963393494","https://openalex.org/W2963703618","https://openalex.org/W2963748441","https://openalex.org/W2963846996","https://openalex.org/W2978670439","https://openalex.org/W2979775090","https://openalex.org/W2979826702","https://openalex.org/W3048443606","https://openalex.org/W3048822613","https://openalex.org/W3104033643","https://openalex.org/W3119866685","https://openalex.org/W3147874613","https://openalex.org/W3170796112","https://openalex.org/W3205972749","https://openalex.org/W3207645655","https://openalex.org/W4200634402","https://openalex.org/W4224296706","https://openalex.org/W4224903667","https://openalex.org/W4225390052","https://openalex.org/W4225905768","https://openalex.org/W4226079124","https://openalex.org/W4226153346","https://openalex.org/W4239390603","https://openalex.org/W4281612616","https://openalex.org/W4282028729","https://openalex.org/W4287121196","https://openalex.org/W4287124167","https://openalex.org/W4287391717","https://openalex.org/W4287866765","https://openalex.org/W4290876177","https://openalex.org/W4293718192","https://openalex.org/W4295116917","https://openalex.org/W4297821991","https://openalex.org/W4304697563","https://openalex.org/W4366084930","https://openalex.org/W4385567093","https://openalex.org/W4394650427","https://openalex.org/W6688871553"],"related_works":["https://openalex.org/W3049149308","https://openalex.org/W3036339064","https://openalex.org/W3132686465","https://openalex.org/W3080418168","https://openalex.org/W2061245293","https://openalex.org/W3031501629","https://openalex.org/W2280422768","https://openalex.org/W3143197806","https://openalex.org/W2985497073","https://openalex.org/W4252555497"],"abstract_inverted_index":{"The":[0],"sparse":[1,45,76,108,140],"Mixture-of-Experts":[2],"(Sparse-MoE)":[3],"framework":[4],"efficiently":[5],"scales":[6],"up":[7,94,190,206,227],"model":[8,249],"capacity":[9],"in":[10,137,194,231],"various":[11,167],"domains,":[12,168],"such":[13],"as":[14,257,259],"natural":[15,174],"language":[16,175,240],"processing":[17],"and":[18,51,96,147,173,180,203,205,217],"vision.":[19],"Sparse-MoEs":[20],"select":[21],"a":[22,29,40,74,82,126],"subset":[23],"of":[24,31,107],"the":[25,32,103,232,246],"\"experts\"":[26],"(thus,":[27],"only":[28],"portion":[30],"overall":[33],"network)":[34],"for":[35,235,239,250],"each":[36],"input":[37],"sample":[38],"using":[39],"sparse,":[41],"trainable":[42],"gate.":[43],"Existing":[44],"gates":[46,212,219],"are":[47,113,220],"prone":[48,115],"to":[49,67,92,102,116,191,207,228],"convergence":[50],"performance":[52],"issues":[53],"when":[54],"training":[55,138],"with":[56,121,186,222],"first-order":[57,111,135],"optimization":[58],"methods.":[59],"In":[60],"this":[61,122],"paper,":[62],"we":[63,72,124,225],"introduce":[64],"two":[65],"improvements":[66],"current":[68],"MoE":[69],"approaches.":[70],"First,":[71],"propose":[73,125],"new":[75],"gate:":[77],"COMET,":[78],"which":[79],"relies":[80],"on":[81,166,253],"novel":[83],"tree-based":[84],"mechanism.":[85],"COMET":[86],"is":[87],"differentiable,":[88],"can":[89,133,154],"exploit":[90],"sparsity":[91],"speed":[93],"computation,":[95],"outperforms":[97],"state-of-the-art":[98,247],"gates.":[99],"Second,":[100],"due":[101],"challenging":[104],"combinatorial":[105],"nature":[106],"expert":[109],"selection,":[110],"methods":[112,136],"typically":[114],"low-quality":[117],"solutions.":[118,161],"To":[119],"deal":[120],"challenge,":[123],"novel,":[127],"permutation-based":[128],"local":[129,152,187,223],"search":[130,153],"method":[131],"that":[132,151],"complement":[134],"any":[139],"gate,":[141],"e.g.,":[142,200,213],"Hash":[143,201,218],"routing,":[144],"Top-k,":[145,204],"DSelect-k,":[146],"COMET.":[148],"We":[149,162],"show":[150],"help":[155],"networks":[156],"escape":[157],"bad":[158],"initializations":[159],"or":[160],"performed":[163],"large-scale":[164],"experiments":[165],"including":[169],"recommender":[170,181],"systems,":[171],"vision,":[172],"processing.":[176],"On":[177],"standard":[178],"vision":[179],"systems":[182],"benchmarks,":[183],"COMET+":[184],"(COMET":[185],"search)":[188],"achieves":[189],"13%":[192],"improvement":[193],"ROC":[195],"AUC":[196],"over":[197,209,245],"popular":[198],"gates,":[199],"routing":[202],"9%":[208],"prior":[210],"differentiable":[211],"DSelect-k.":[214],"When":[215],"Top-k":[216],"combined":[221],"search,":[224],"see":[226],"100X":[229],"reduction":[230],"budget":[233],"needed":[234],"hyperparameter":[236],"tuning.":[237],"Moreover,":[238],"modeling,":[241],"our":[242],"approach":[243],"improves":[244],"MoEBERT":[248],"distilling":[251],"BERT":[252],"5/7":[254],"GLUE":[255],"benchmarks":[256],"well":[258],"SQuAD":[260],"dataset.":[261]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
