{"id":"https://openalex.org/W7105022518","doi":"https://doi.org/10.1109/tase.2025.3631883","title":"Multimodal Reinforcement Learning With Score-Based Policy","display_name":"Multimodal Reinforcement Learning With Score-Based Policy","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W7105022518","doi":"https://doi.org/10.1109/tase.2025.3631883"},"language":null,"primary_location":{"id":"doi:10.1109/tase.2025.3631883","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2025.3631883","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wenjun Zou","orcid":"https://orcid.org/0009-0009-5331-0938"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenjun Zou","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yinuo Wang","orcid":"https://orcid.org/0009-0003-6252-0618"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinuo Wang","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tong Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tong Liu","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Bin Shuai","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Shuai","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Liming Xiao","orcid":"https://orcid.org/0009-0006-4566-1103"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liming Xiao","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yinsong Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yinsong Ma","raw_affiliation_strings":["Laboratory for Computational Sensing and Robotics, Johns Hopkins University, Baltimore, MD, USA"],"affiliations":[{"raw_affiliation_string":"Laboratory for Computational Sensing and Robotics, Johns Hopkins University, Baltimore, MD, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jingliang Duan","orcid":"https://orcid.org/0000-0002-3697-1576"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingliang Duan","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":null,"display_name":"Shengbo Eben Li","orcid":"https://orcid.org/0000-0003-4923-3633"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengbo Eben Li","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.76879731,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"22","issue":null,"first_page":"24105","last_page":"24119"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8392999768257141,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8392999768257141,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.012400000356137753,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.00930000003427267,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7595999836921692},{"id":"https://openalex.org/keywords/importance-sampling","display_name":"Importance sampling","score":0.5335999727249146},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5157999992370605},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4970000088214874},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.42739999294281006},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.41780000925064087},{"id":"https://openalex.org/keywords/resampling","display_name":"Resampling","score":0.41190001368522644}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7595999836921692},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7297000288963318},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6643000245094299},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5809000134468079},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.5335999727249146},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5157999992370605},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4970000088214874},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.42739999294281006},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.41780000925064087},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.41190001368522644},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4092999994754791},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.3765000104904175},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.33169999718666077},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C192576344","wikidata":"https://www.wikidata.org/wiki/Q194706","display_name":"Boltzmann machine","level":3,"score":0.2700999975204468},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tase.2025.3631883","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2025.3631883","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3945676288","display_name":null,"funder_award_id":"52202487","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4443787319","display_name":null,"funder_award_id":"2024YFB2505500","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2002664623","https://openalex.org/W2099867508","https://openalex.org/W2116064496","https://openalex.org/W2158782408","https://openalex.org/W2746553466","https://openalex.org/W2903253065","https://openalex.org/W2904246096","https://openalex.org/W2968029133","https://openalex.org/W3015082424","https://openalex.org/W4214717370","https://openalex.org/W4353056919","https://openalex.org/W4362650413","https://openalex.org/W4366158867","https://openalex.org/W4386285856","https://openalex.org/W4389065366","https://openalex.org/W4389879653","https://openalex.org/W4391640441","https://openalex.org/W4393144926","https://openalex.org/W4394006698","https://openalex.org/W4401415792","https://openalex.org/W4403337227","https://openalex.org/W4406983280","https://openalex.org/W4415796419","https://openalex.org/W4415797600","https://openalex.org/W4415797647"],"related_works":[],"abstract_inverted_index":{"Learning":[0],"multimodal":[1,177,191],"policies":[2,27],"is":[3,199],"crucial":[4],"for":[5],"enhancing":[6],"exploration":[7],"in":[8,14,37,84,160],"online":[9,38],"reinforcement":[10],"learning":[11],"(RL),":[12],"especially":[13],"tasks":[15],"with":[16,58,64],"continuous":[17],"action":[18,55,68,106,122],"spaces":[19],"and":[20,52,108,136,143,167,184],"non-convex":[21],"reward":[22],"landscapes.":[23],"While":[24],"recent":[25],"diffusion":[26],"show":[28],"promise,":[29],"they":[30],"often":[31],"suffer":[32],"from":[33,93],"low":[34],"computational":[35,186],"efficiency":[36,187],"settings.":[39],"A":[40],"more":[41],"training-efficient":[42],"paradigm":[43],"involves":[44],"modeling":[45],"the":[46,54,59,62,67,71,75,101,112,117,140,161],"policy":[47],"as":[48,78],"a":[49],"Boltzmann":[50],"distribution":[51],"guiding":[53],"sampling":[56,97,113,141],"directly":[57],"gradient":[60],"of":[61,74,105,119,196],"Q-value":[63],"respect":[65],"to":[66,70,138,146,189],"(proportional":[69],"score":[72,134],"function":[73],"policy),":[76],"such":[77],"via":[79],"Langevin":[80],"dynamics.":[81],"However,":[82],"analysis":[83],"this":[85,89,128,197],"paper":[86,129,198],"reveals":[87],"that":[88,171],"gradient-guided":[90],"approach":[91],"suffers":[92],"two":[94],"critical":[95],"challenges:":[96],"instability":[98],"caused":[99],"by":[100],"widely":[102],"varying":[103],"magnitude":[104],"gradients;":[107],"mode":[109,148],"imbalance,":[110],"where":[111],"process":[114],"inaccurately":[115],"represents":[116],"weights":[118],"different":[120],"high-value":[121],"modes.":[123],"To":[124],"address":[125],"these":[126],"challenges,":[127],"introduces":[130],"three":[131],"targeted":[132],"techniques:":[133],"normalization":[135],"reshaping":[137],"stabilize":[139],"process,":[142],"value-based":[144],"resampling":[145],"correct":[147],"imbalance.":[149],"These":[150],"techniques":[151],"are":[152],"then":[153],"integrated":[154],"into":[155],"an":[156],"actor-critic":[157],"framework,":[158],"resulting":[159],"Score-Enhanced":[162],"Actor-Critic":[163],"(SEAC)":[164],"algorithm.":[165],"Simulation":[166],"real-world":[168],"experiments":[169],"demonstrate":[170],"SEAC":[172],"not":[173],"only":[174],"effectively":[175],"learns":[176],"behaviors":[178],"but":[179],"also":[180],"achieves":[181],"state-of-the-art":[182],"performance":[183],"high":[185],"compared":[188],"prior":[190],"RL":[192],"methods.":[193],"The":[194],"code":[195],"available":[200],"at":[201],"https://github.com/THUzouwenjun/SEAC.":[202]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-12T00:00:00"}
