{"id":"https://openalex.org/W4401907739","doi":"https://doi.org/10.1109/icdl61372.2024.10644164","title":"Robot Motion Learning via Visual Imitation and Language-Conditioned Reward","display_name":"Robot Motion Learning via Visual Imitation and Language-Conditioned Reward","publication_year":2024,"publication_date":"2024-05-20","ids":{"openalex":"https://openalex.org/W4401907739","doi":"https://doi.org/10.1109/icdl61372.2024.10644164"},"language":"en","primary_location":{"id":"doi:10.1109/icdl61372.2024.10644164","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icdl61372.2024.10644164","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Development and Learning (ICDL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113350652","display_name":"Masaya Nakano","orcid":"https://orcid.org/0009-0009-5407-0016"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Masaya Nakano","raw_affiliation_strings":["The University of Electro-Communications,Department of Mechanical and Intelligence Systems Engineering,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"The University of Electro-Communications,Department of Mechanical and Intelligence Systems Engineering,Tokyo,Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054151238","display_name":"Masatoshi Nagano","orcid":null},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masatoshi Nagano","raw_affiliation_strings":["The University of Electro-Communications,Department of Mechanical and Intelligence Systems Engineering,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"The University of Electro-Communications,Department of Mechanical and Intelligence Systems Engineering,Tokyo,Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073741458","display_name":"Tomoaki Nakamura","orcid":"https://orcid.org/0000-0002-3183-4599"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomoaki Nakamura","raw_affiliation_strings":["The University of Electro-Communications,Department of Mechanical and Intelligence Systems Engineering,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"The University of Electro-Communications,Department of Mechanical and Intelligence Systems Engineering,Tokyo,Japan","institution_ids":["https://openalex.org/I20529979"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5113350652"],"corresponding_institution_ids":["https://openalex.org/I20529979"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13486518,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"32","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7161856293678284},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.6317218542098999},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5841382145881653},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5694925785064697},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5470810532569885},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4783979654312134},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.41694965958595276},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.3536193370819092},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2564428448677063},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.11482518911361694}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7161856293678284},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.6317218542098999},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5841382145881653},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5694925785064697},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5470810532569885},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4783979654312134},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.41694965958595276},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3536193370819092},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2564428448677063},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.11482518911361694}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdl61372.2024.10644164","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icdl61372.2024.10644164","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Development and Learning (ICDL)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1971378412","https://openalex.org/W2044847039","https://openalex.org/W2071533946","https://openalex.org/W2098516422","https://openalex.org/W2147800946","https://openalex.org/W2618530766","https://openalex.org/W2962793481","https://openalex.org/W2963767194","https://openalex.org/W3207488890","https://openalex.org/W4288020136","https://openalex.org/W6631190155","https://openalex.org/W6766848740","https://openalex.org/W6791353385"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2935909890","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W1531601525","https://openalex.org/W2665305151"],"abstract_inverted_index":{"In":[0,203],"recent":[1],"years,":[2],"there":[3],"has":[4,33],"been":[5,34],"an":[6,110],"increase":[7],"in":[8,17,29,69],"the":[9,53,57,60,64,91,136,142,146,150,155,158,164,168,185,204,206,215,218,230,237,242,249,253,262],"use":[10],"of":[11,56,63,73,93,102,107,141,163,217,229,264,267],"robots":[12,22,40],"that":[13,75,105,189,248,260],"coexist":[14],"with":[15],"humans":[16],"society":[18],"and":[19,66,80,96,115,120,128,145,149,160,195,220,270],"homes.":[20],"These":[21],"are":[23,84,197],"required":[24],"to":[25,41,78,199,256,273],"perform":[26,257],"various":[27,44],"motions":[28,196,211],"such":[30],"environments.":[31],"Research":[32],"conducted":[35],"on":[36,184],"imitation":[37,48,103,111,258],"learning":[38,49,104,213],"for":[39,71,86],"easily":[42,191],"learn":[43],"motions.":[45,244],"However,":[46],"conventional":[47],"methods":[50],"require":[51],"converting":[52],"motion":[54],"information":[55],"instructor":[58],"into":[59],"joint":[61,161],"angles":[62,162],"robot":[65,81,147,207,221,255],"preparing":[67],"data":[68,87,94,216],"advance":[70],"pairs":[72],"images":[74,140],"uniquely":[76],"correspond":[77],"human-motion":[79],"images,":[82],"which":[83,178,223],"costly":[85],"collection.":[88],"To":[89],"reduce":[90],"cost":[92],"collection":[95],"learning,":[97],"we":[98],"propose":[99],"a":[100,116,121,125,175,180],"method":[101,232,251],"consists":[106],"two":[108],"stages:":[109],"process":[112,123],"using":[113,124,174,193],"CycleGAN":[114,134],"convolutional":[117,151,169],"neural":[118,152,170],"network,":[119],"correction":[122],"policy":[126,176],"gradient":[127],"contrastive":[129,186],"language\u2013image":[130,187],"pretraining.":[131],"During":[132,166],"imitation,":[133],"learns":[135,154],"visual":[137],"correspondence":[138,156],"between":[139,157],"human":[143,210,219],"body":[144],"body,":[148],"network":[153,171],"image":[159],"robot.":[165],"correction,":[167],"is":[172,190],"fine-tuned":[173],"gradient,":[177],"uses":[179],"reward":[181],"function":[182],"based":[183],"pretraining":[188],"designed":[192],"language,":[194],"corrected":[198],"achieve":[200],"desired":[201],"tasks.":[202,276],"experiments,":[205],"arm":[208,254],"reproduced":[209],"by":[212,235],"from":[214],"arms,":[222],"were":[224],"moved":[225],"randomly.":[226],"The":[227],"effectiveness":[228],"proposed":[231,250],"was":[233],"demonstrated":[234],"completing":[236],"assigned":[238],"task":[239],"after":[240],"correcting":[241],"imitated":[243],"Experimental":[245],"results":[246],"show":[247],"enabled":[252,271],"behavior":[259],"captured":[261],"outlines":[263],"three":[265],"types":[266],"unlearned":[268],"behaviors":[269],"it":[272],"accomplish":[274],"meaningful":[275]},"counts_by_year":[],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
