{"id":"https://openalex.org/W3205091198","doi":"https://doi.org/10.1109/icra48506.2021.9561187","title":"Multi-Modal Mutual Information (MuMMI) Training for Robust Self-Supervised Deep Reinforcement Learning","display_name":"Multi-Modal Mutual Information (MuMMI) Training for Robust Self-Supervised Deep Reinforcement Learning","publication_year":2021,"publication_date":"2021-05-30","ids":{"openalex":"https://openalex.org/W3205091198","doi":"https://doi.org/10.1109/icra48506.2021.9561187","mag":"3205091198"},"language":"en","primary_location":{"id":"doi:10.1109/icra48506.2021.9561187","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561187","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101687630","display_name":"Kaiqi Chen","orcid":"https://orcid.org/0000-0001-9635-5698"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Kaiqi Chen","raw_affiliation_strings":["Dept. of Computer Science, National University of Singapore"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science, National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100627849","display_name":"Yong Lee","orcid":"https://orcid.org/0000-0003-2945-7322"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yong Lee","raw_affiliation_strings":["Dept. of Computer Science, National University of Singapore"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science, National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066073375","display_name":"Harold Soh","orcid":"https://orcid.org/0000-0002-3278-0035"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Harold Soh","raw_affiliation_strings":["Dept. of Computer Science, National University of Singapore"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science, National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101687630"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":2.0395,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.89150834,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"4274","last_page":"4280"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7719442248344421},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7151457071304321},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6617098450660706},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6242031455039978},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.6123270988464355},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5719244480133057},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5699512958526611},{"id":"https://openalex.org/keywords/mutual-information","display_name":"Mutual information","score":0.5470890998840332},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5252349376678467},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5001974105834961},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4552033543586731},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.44864094257354736},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.43978822231292725},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.41306185722351074},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12369778752326965},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10495087504386902}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7719442248344421},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7151457071304321},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6617098450660706},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6242031455039978},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.6123270988464355},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5719244480133057},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5699512958526611},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.5470890998840332},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5252349376678467},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5001974105834961},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4552033543586731},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.44864094257354736},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.43978822231292725},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.41306185722351074},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12369778752326965},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10495087504386902},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra48506.2021.9561187","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561187","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334971","display_name":"Science and Engineering Research Council","ror":"https://ror.org/00zgdb249"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1902387477","https://openalex.org/W2011418219","https://openalex.org/W2105594594","https://openalex.org/W2105934661","https://openalex.org/W2116064496","https://openalex.org/W2117539524","https://openalex.org/W2168359464","https://openalex.org/W2336416123","https://openalex.org/W2786541991","https://openalex.org/W2842511635","https://openalex.org/W2900152462","https://openalex.org/W2904246096","https://openalex.org/W2963198154","https://openalex.org/W2968095426","https://openalex.org/W2980999634","https://openalex.org/W2995298643","https://openalex.org/W2998028256","https://openalex.org/W3012366945","https://openalex.org/W3036619998","https://openalex.org/W3040099822","https://openalex.org/W3047079303","https://openalex.org/W3088304681","https://openalex.org/W3205091198","https://openalex.org/W4289294484","https://openalex.org/W4297808394","https://openalex.org/W6676012828","https://openalex.org/W6703161083","https://openalex.org/W6748455135","https://openalex.org/W6756256016","https://openalex.org/W6757592117","https://openalex.org/W6771217966","https://openalex.org/W6781981342","https://openalex.org/W6782766965","https://openalex.org/W6844194202","https://openalex.org/W6910476156"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W203959209","https://openalex.org/W2110287964","https://openalex.org/W2167701463","https://openalex.org/W4307407935"],"abstract_inverted_index":{"This":[0],"work":[1],"focuses":[2],"on":[3,35,40,91],"learning":[4,111],"useful":[5],"and":[6,38,96],"robust":[7],"deep":[8,51,109],"world":[9],"models":[10],"using":[11,56],"multiple,":[12],"possibly":[13],"unreliable,":[14],"sensors.":[15,42],"We":[16,80],"find":[17],"that":[18,70],"current":[19],"methods":[20],"do":[21],"not":[22],"sufficiently":[23],"encourage":[24],"a":[25,44,48,57,65,88,97],"shared":[26],"representation":[27],"between":[28,73],"modalities;":[29],"this":[30],"can":[31],"cause":[32],"poor":[33],"performance":[34],"downstream":[36],"tasks":[37],"over-reliance":[39],"specific":[41],"As":[43],"solution,":[45],"we":[46],"contribute":[47],"new":[49],"multi-modal":[50,92],"latent":[52,75],"state-space":[53],"model,":[54],"trained":[55],"mutual":[58],"information":[59],"lower-bound.":[60],"The":[61],"key":[62],"innovation":[63],"is":[64],"specially-designed":[66],"density":[67],"ratio":[68],"estimator":[69],"encourages":[71],"consistency":[72],"the":[74,115],"codes":[76],"of":[77,117],"each":[78],"modality.":[79],"tasked":[81],"our":[82,104],"method":[83,105],"to":[84],"learn":[85],"policies":[86],"(in":[87],"self-supervised":[89],"manner)":[90],"Natural":[93],"MuJoCo":[94],"benchmarks":[95],"challenging":[98],"Table":[99],"Wiping":[100],"task.":[101],"Experiments":[102],"show":[103],"significantly":[106],"outperforms":[107],"state-of-the-art":[108],"reinforcement":[110],"methods,":[112],"particularly":[113],"in":[114],"presence":[116],"missing":[118],"observations.":[119]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
