{"id":"https://openalex.org/W4404194808","doi":"https://doi.org/10.1017/s0263574724000626","title":"Robot imitation from multimodal observation with unsupervised cross-modal representation","display_name":"Robot imitation from multimodal observation with unsupervised cross-modal representation","publication_year":2024,"publication_date":"2024-10-01","ids":{"openalex":"https://openalex.org/W4404194808","doi":"https://doi.org/10.1017/s0263574724000626"},"language":"en","primary_location":{"id":"doi:10.1017/s0263574724000626","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s0263574724000626","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/6B8AA9AC5470D433625BB7F795F091D6/S0263574724000626a.pdf/div-class-title-robot-imitation-from-multimodal-observation-with-unsupervised-cross-modal-representation-div.pdf","source":{"id":"https://openalex.org/S92163612","display_name":"Robotica","issn_l":"0263-5747","issn":["0263-5747","1469-8668"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Robotica","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/6B8AA9AC5470D433625BB7F795F091D6/S0263574724000626a.pdf/div-class-title-robot-imitation-from-multimodal-observation-with-unsupervised-cross-modal-representation-div.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005456562","display_name":"Xuanhui Xu","orcid":"https://orcid.org/0000-0003-0394-8713"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanhui Xu","raw_affiliation_strings":["College of Electronic and Information Engineering, Tongji University, ShangHai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Tongji University, ShangHai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010299064","display_name":"Mingyu You","orcid":"https://orcid.org/0000-0003-2758-167X"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingyu You","raw_affiliation_strings":["College of Electronic and Information Engineering, Tongji University, ShangHai, China","National Key Laboratory of Autonomous Intelligent Unmanned Systems, Frontiers Science Center for Intelligent Autonomous Systems, Ministry of Education, Tongji University, ShangHai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Tongji University, ShangHai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"National Key Laboratory of Autonomous Intelligent Unmanned Systems, Frontiers Science Center for Intelligent Autonomous Systems, Ministry of Education, Tongji University, ShangHai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101789136","display_name":"Hongjun Zhou","orcid":"https://orcid.org/0000-0002-6256-2485"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongjun Zhou","raw_affiliation_strings":["College of Electronic and Information Engineering, Tongji University, ShangHai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Tongji University, ShangHai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049557511","display_name":"Bin He","orcid":"https://orcid.org/0000-0003-3193-6269"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin He","raw_affiliation_strings":["College of Electronic and Information Engineering, Tongji University, ShangHai, China","National Key Laboratory of Autonomous Intelligent Unmanned Systems, Frontiers Science Center for Intelligent Autonomous Systems, Ministry of Education, Tongji University, ShangHai, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Tongji University, ShangHai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"National Key Laboratory of Autonomous Intelligent Unmanned Systems, Frontiers Science Center for Intelligent Autonomous Systems, Ministry of Education, Tongji University, ShangHai, China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5010299064"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23771038,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"42","issue":"10","first_page":"3247","last_page":"3262"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.6744509935379028},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6487691402435303},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.632976770401001},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5977984666824341},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5876857042312622},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5751760601997375},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4764438271522522},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.14866122603416443}],"concepts":[{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.6744509935379028},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6487691402435303},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.632976770401001},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5977984666824341},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5876857042312622},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5751760601997375},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4764438271522522},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.14866122603416443},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s0263574724000626","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s0263574724000626","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/6B8AA9AC5470D433625BB7F795F091D6/S0263574724000626a.pdf/div-class-title-robot-imitation-from-multimodal-observation-with-unsupervised-cross-modal-representation-div.pdf","source":{"id":"https://openalex.org/S92163612","display_name":"Robotica","issn_l":"0263-5747","issn":["0263-5747","1469-8668"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Robotica","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1017/s0263574724000626","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s0263574724000626","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/6B8AA9AC5470D433625BB7F795F091D6/S0263574724000626a.pdf/div-class-title-robot-imitation-from-multimodal-observation-with-unsupervised-cross-modal-representation-div.pdf","source":{"id":"https://openalex.org/S92163612","display_name":"Robotica","issn_l":"0263-5747","issn":["0263-5747","1469-8668"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Robotica","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4404194808.pdf","grobid_xml":"https://content.openalex.org/works/W4404194808.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W2187089797","https://openalex.org/W2521757446","https://openalex.org/W2769112066","https://openalex.org/W2884247313","https://openalex.org/W2899405112","https://openalex.org/W2903181768","https://openalex.org/W2904455790","https://openalex.org/W2952165569","https://openalex.org/W2962787969","https://openalex.org/W2963802910","https://openalex.org/W2979863294","https://openalex.org/W3004917840","https://openalex.org/W3006904299","https://openalex.org/W3082900443","https://openalex.org/W3096831136","https://openalex.org/W3112664346","https://openalex.org/W3129140312","https://openalex.org/W3135705943","https://openalex.org/W3160314846","https://openalex.org/W3163015603","https://openalex.org/W3168133273","https://openalex.org/W3171007011","https://openalex.org/W3188220908","https://openalex.org/W3195968524","https://openalex.org/W3205656681","https://openalex.org/W3206485410","https://openalex.org/W4294936113","https://openalex.org/W4391225493","https://openalex.org/W4391748725","https://openalex.org/W6757470271","https://openalex.org/W6764169831"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W1531601525","https://openalex.org/W2665305151"],"abstract_inverted_index":{"Abstract":[0],"Imitation":[1],"from":[2,11,106],"Observation":[3],"(IfO)":[4],"prompts":[5],"the":[6,21,53,70,103,124,162,170],"robot":[7,95,172,184],"to":[8,28,52,61,68,113,141],"imitate":[9],"tasks":[10],"unlabeled":[12],"videos":[13],"via":[14,97,145],"reinforcement":[15],"learning":[16],"(RL).":[17],"The":[18,137,151,183],"performance":[19],"of":[20,55],"IfO":[22,37,96],"algorithm":[23],"depends":[24],"on":[25,169],"its":[26],"ability":[27],"extract":[29,39,142],"task-relevant":[30,65,143],"representations":[31,41,144],"since":[32],"images":[33],"are":[34,83],"informative.":[35],"Existing":[36],"algorithms":[38],"image":[40],"by":[42,76],"using":[43],"a":[44,63,77,91,131,146],"simple":[45,71],"encoding":[46,72],"network":[47,79],"or":[48],"pre-trained":[49,78],"network.":[50,73],"Due":[51],"lack":[54],"action":[56],"labels,":[57],"it":[58],"is":[59],"challenging":[60],"design":[62,114],"supervised":[64],"proxy":[66,117,149],"task":[67],"train":[69],"Representations":[74],"extracted":[75],"such":[80],"as":[81],"Resnet":[82],"often":[84],"task-irrelevant.":[85],"In":[86],"this":[87],"article,":[88],"we":[89],"propose":[90],"new":[92],"approach":[93,120,168],"for":[94,153],"multimodal":[98,147],"observations.":[99],"Different":[100],"modalities":[101],"describe":[102],"same":[104],"information":[105],"different":[107],"sides,":[108],"which":[109],"can":[110],"be":[111],"used":[112],"an":[115],"unsupervised":[116,125,148],"task.":[118,150,182],"Our":[119],"contains":[121],"two":[122],"modules:":[123],"cross-modal":[126],"representation":[127],"(UCMR)":[128],"module":[129,139],"and":[130,179],"self-behavioral":[132],"cloning":[133],"(self-BC)-based":[134],"RL":[135,163],"module.":[136],"UCMR":[138],"learns":[140],"Self-BC":[152],"further":[154],"offline":[155],"policy":[156],"optimization":[157],"collects":[158],"successful":[159],"experiences":[160],"during":[161],"training.":[164],"We":[165],"evaluate":[166],"our":[167],"real":[171],"pouring":[173,177,180],"water":[174],"task,":[175,178],"quantitative":[176],"sand":[181],"achieves":[185],"state-of-the-art":[186],"performance.":[187]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
