{"id":"https://openalex.org/W7127425810","doi":"https://doi.org/10.48550/arxiv.2602.00939","title":"Improving Minimax Estimation Rates for Contaminated Mixture of Multinomial Logistic Experts via Expert Heterogeneity","display_name":"Improving Minimax Estimation Rates for Contaminated Mixture of Multinomial Logistic Experts via Expert Heterogeneity","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7127425810","doi":"https://doi.org/10.48550/arxiv.2602.00939"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.00939","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054211610","display_name":"Fanqi Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yan, Fanqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124939757","display_name":"Dung T Le","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Le, Dung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124914766","display_name":"Trang Pham","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pham, Trang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124909777","display_name":"Huy Ph\u01b0\u01a1ng Nguy\u1ec5n","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Huy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124968873","display_name":"Nhat Ho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ho, Nhat","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5054211610"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.164900004863739,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.164900004863739,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.1582999974489212,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.1306000053882599,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/minimax","display_name":"Minimax","score":0.7330999970436096},{"id":"https://openalex.org/keywords/multinomial-logistic-regression","display_name":"Multinomial logistic regression","score":0.5479000210762024},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.5181000232696533},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.49559998512268066},{"id":"https://openalex.org/keywords/multinomial-distribution","display_name":"Multinomial distribution","score":0.4674000144004822},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.43799999356269836},{"id":"https://openalex.org/keywords/sample-size-determination","display_name":"Sample size determination","score":0.40310001373291016}],"concepts":[{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.7330999970436096},{"id":"https://openalex.org/C117568660","wikidata":"https://www.wikidata.org/wiki/Q1650843","display_name":"Multinomial logistic regression","level":2,"score":0.5479000210762024},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.5181000232696533},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.49559998512268066},{"id":"https://openalex.org/C192065140","wikidata":"https://www.wikidata.org/wiki/Q1147928","display_name":"Multinomial distribution","level":2,"score":0.4674000144004822},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4528999924659729},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.43799999356269836},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4296000003814697},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.4189000129699707},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4081000089645386},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.40310001373291016},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.38339999318122864},{"id":"https://openalex.org/C2777984123","wikidata":"https://www.wikidata.org/wiki/Q9248237","display_name":"Pointwise","level":2,"score":0.3571000099182129},{"id":"https://openalex.org/C167928553","wikidata":"https://www.wikidata.org/wiki/Q1376021","display_name":"Estimation theory","level":2,"score":0.3483999967575073},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.34130001068115234},{"id":"https://openalex.org/C41426520","wikidata":"https://www.wikidata.org/wiki/Q1192065","display_name":"Point estimation","level":2,"score":0.3352999985218048},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33309999108314514},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.3199999928474426},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2653000056743622}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.00939","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.00939","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.00939","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.00939","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Contaminated":[0],"mixture":[1,122],"of":[2,46,100,119,123,181],"experts":[3,126],"(MoE)":[4],"is":[5,20,195],"motivated":[6],"by":[7,110],"transfer":[8],"learning":[9],"methods":[10],"where":[11,147],"a":[12,17,28,35,120],"pre-trained":[13],"model,":[14,25,51],"acting":[15],"as":[16,27],"frozen":[18],"expert,":[19,30],"integrated":[21],"with":[22,127,151],"an":[23,175],"adapter":[24],"functioning":[26],"trainable":[29],"in":[31,49,58,70,77],"order":[32],"to":[33,41,163],"learn":[34],"new":[36],"task.":[37],"Despite":[38],"recent":[39],"efforts":[40],"analyze":[42],"the":[43,59,62,113,116,152,179],"convergence":[44,92,117,139],"behavior":[45],"parameter":[47,95,190],"estimation":[48,96,191],"this":[50,104],"there":[52],"are":[53,168],"still":[54],"two":[55],"unresolved":[56],"problems":[57],"literature.":[60],"First,":[61],"contaminated":[63,121,182],"MoE":[64,86],"model":[65],"has":[66],"been":[67],"studied":[68],"solely":[69],"regression":[71],"settings,":[72],"while":[73],"its":[74],"theoretical":[75],"foundation":[76],"classification":[78,89],"settings":[79,146],"remains":[80],"absent.":[81],"Second,":[82],"previous":[83],"works":[84],"on":[85],"models":[87],"for":[88,94,112,141],"capture":[90],"pointwise":[91],"rates":[93,140,167,192],"without":[97],"any":[98],"guaranty":[99],"minimax":[101,160,169],"optimality.":[102],"In":[103,133],"work,":[105],"we":[106,136,156],"close":[107],"these":[108,166],"gaps":[109],"performing,":[111],"first":[114],"time,":[115],"analysis":[118],"multinomial":[124],"logistic":[125],"homogeneous":[128],"and":[129],"heterogeneous":[130],"structures,":[131],"respectively.":[132],"each":[134],"regime,":[135],"characterize":[137],"uniform":[138],"estimating":[142],"parameters":[143,149],"under":[144],"challenging":[145],"ground-truth":[148],"vary":[150],"sample":[153],"size.":[154],"Furthermore,":[155],"also":[157],"establish":[158],"corresponding":[159],"lower":[161],"bounds":[162],"ensure":[164],"that":[165,184],"optimal.":[170],"Notably,":[171],"our":[172],"theories":[173],"offer":[174],"important":[176],"insight":[177],"into":[178],"design":[180],"MoE,":[183],"is,":[185],"expert":[186,199],"heterogeneity":[187],"yields":[188],"faster":[189],"and,":[193],"therefore,":[194],"more":[196],"sample-efficient":[197],"than":[198],"homogeneity.":[200]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-04T00:00:00"}
