{"id":"https://openalex.org/W7131383980","doi":"https://doi.org/10.48550/arxiv.2602.19244","title":"Robust Exploration in Directed Controller Synthesis via Reinforcement Learning with Soft Mixture-of-Experts","display_name":"Robust Exploration in Directed Controller Synthesis via Reinforcement Learning with Soft Mixture-of-Experts","publication_year":2026,"publication_date":"2026-02-22","ids":{"openalex":"https://openalex.org/W7131383980","doi":"https://doi.org/10.48550/arxiv.2602.19244"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.19244","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19244","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.19244","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126630618","display_name":"Toshihide UBUKATA","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ubukata, Toshihide","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126715847","display_name":"Zhiyao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhiyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125753699","display_name":"Enhong MU","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mu, Enhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126833294","display_name":"Jialong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jialong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5045332896","display_name":"Kenji Tei","orcid":"https://orcid.org/0000-0003-1106-1709"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tei, Kenji","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5126630618"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11489","display_name":"Air Traffic Management and Optimization","score":0.7293999791145325,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11489","display_name":"Air Traffic Management and Optimization","score":0.7293999791145325,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.08380000293254852,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.04050000011920929,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7229999899864197},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6873999834060669},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5601999759674072},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.48410001397132874},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.4487000107765198},{"id":"https://openalex.org/keywords/robust-control","display_name":"Robust control","score":0.383899986743927},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.3522000014781952},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.34709998965263367}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7229999899864197},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6873999834060669},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6315000057220459},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5601999759674072},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.48410001397132874},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.4487000107765198},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3903999924659729},{"id":"https://openalex.org/C31531917","wikidata":"https://www.wikidata.org/wiki/Q915157","display_name":"Robust control","level":3,"score":0.383899986743927},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.3522000014781952},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.34709998965263367},{"id":"https://openalex.org/C92991967","wikidata":"https://www.wikidata.org/wiki/Q7644329","display_name":"Supervisory control","level":3,"score":0.3230000138282776},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.30730000138282776},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.30219998955726624},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2736000120639801},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C73586568","wikidata":"https://www.wikidata.org/wiki/Q2600211","display_name":"Parameter space","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.19244","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19244","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.19244","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19244","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"On-the-fly":[0],"Directed":[1],"Controller":[2],"Synthesis":[3],"(OTF-DCS)":[4],"mitigates":[5],"state-space":[6],"explosion":[7],"by":[8],"incrementally":[9],"exploring":[10],"the":[11,65,109,118],"system":[12],"and":[13,32,76,98,122],"relies":[14],"critically":[15],"on":[16,108],"an":[17,53],"exploration":[18],"policy":[19,55],"to":[20,41,73,126],"guide":[21],"search":[22],"efficiently.":[23],"Recent":[24],"reinforcement":[25],"learning":[26],"(RL)":[27],"approaches":[28],"learn":[29],"such":[30],"policies":[31],"achieve":[33],"promising":[34],"zero-shot":[35],"generalization":[36],"from":[37],"small":[38],"training":[39,74],"instances":[40],"larger":[42],"unseen":[43],"ones.":[44],"However,":[45],"a":[46,61,84,94],"fundamental":[47],"limitation":[48],"is":[49],"anisotropic":[50,101],"generalization,":[51],"where":[52],"RL":[54,91],"exhibits":[56],"strong":[57],"performance":[58],"only":[59],"in":[60],"specific":[62],"region":[63],"of":[64],"domain-parameter":[66],"space":[67,121],"while":[68],"remaining":[69],"fragile":[70],"elsewhere":[71],"due":[72],"stochasticity":[75],"trajectory-dependent":[77],"bias.":[78],"To":[79],"address":[80],"this,":[81],"we":[82],"propose":[83],"Soft":[85],"Mixture-of-Experts":[86],"framework":[87],"that":[88,114],"combines":[89],"multiple":[90],"experts":[92],"via":[93],"prior-confidence":[95],"gating":[96],"mechanism":[97],"treats":[99],"these":[100],"behaviors":[102],"as":[103],"complementary":[104],"specializations.":[105],"The":[106],"evaluation":[107],"Air":[110],"Traffic":[111],"benchmark":[112],"shows":[113],"Soft-MoE":[115],"substantially":[116],"expands":[117],"solvable":[119],"parameter":[120],"improves":[123],"robustness":[124],"compared":[125],"any":[127],"single":[128],"expert.":[129]},"counts_by_year":[],"updated_date":"2026-02-26T06:34:08.959763","created_date":"2026-02-26T00:00:00"}
