{"id":"https://openalex.org/W4389666747","doi":"https://doi.org/10.1109/iros55552.2023.10342201","title":"Exploring Visual Pre-training for Robot Manipulation: Datasets, Models and Methods","display_name":"Exploring Visual Pre-training for Robot Manipulation: Datasets, Models and Methods","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4389666747","doi":"https://doi.org/10.1109/iros55552.2023.10342201"},"language":"en","primary_location":{"id":"doi:10.1109/iros55552.2023.10342201","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342201","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101664544","display_name":"Ya Jing","orcid":"https://orcid.org/0000-0002-4179-8210"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ya Jing","raw_affiliation_strings":["ByteDance Research"],"affiliations":[{"raw_affiliation_string":"ByteDance Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104000466","display_name":"Xuelin Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xuelin Zhu","raw_affiliation_strings":["ByteDance Research","Southeast University"],"affiliations":[{"raw_affiliation_string":"ByteDance Research","institution_ids":[]},{"raw_affiliation_string":"Southeast University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035330301","display_name":"Xingbin Liu","orcid":"https://orcid.org/0000-0001-6899-5102"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xingbin Liu","raw_affiliation_strings":["ByteDance Research"],"affiliations":[{"raw_affiliation_string":"ByteDance Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014093222","display_name":"Qie Sima","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qie Sima","raw_affiliation_strings":["ByteDance Research","Tsinghua University"],"affiliations":[{"raw_affiliation_string":"ByteDance Research","institution_ids":[]},{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101278786","display_name":"Taozheng Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taozheng Yang","raw_affiliation_strings":["ByteDance Research"],"affiliations":[{"raw_affiliation_string":"ByteDance Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100934095","display_name":"Yunhai Feng","orcid":"https://orcid.org/0009-0006-3041-9816"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunhai Feng","raw_affiliation_strings":["ByteDance Research"],"affiliations":[{"raw_affiliation_string":"ByteDance Research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065667064","display_name":"Tao Kong","orcid":"https://orcid.org/0000-0002-9412-1457"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao Kong","raw_affiliation_strings":["ByteDance Research"],"affiliations":[{"raw_affiliation_string":"ByteDance Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101664544"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7379,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.73780205,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"11390","last_page":"11395"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7811440229415894},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.7482509613037109},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7135705947875977},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.514337420463562},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5120583176612854},{"id":"https://openalex.org/keywords/robot-learning","display_name":"Robot learning","score":0.4657118618488312},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4445949196815491},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.36634641885757446},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3452637195587158},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.22956642508506775}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7811440229415894},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.7482509613037109},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7135705947875977},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.514337420463562},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5120583176612854},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.4657118618488312},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4445949196815491},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.36634641885757446},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3452637195587158},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.22956642508506775},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros55552.2023.10342201","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342201","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1892339738","https://openalex.org/W1895577753","https://openalex.org/W1903029394","https://openalex.org/W2108598243","https://openalex.org/W2155007355","https://openalex.org/W2194775991","https://openalex.org/W2962787969","https://openalex.org/W2963326767","https://openalex.org/W2963411833","https://openalex.org/W2998012869","https://openalex.org/W3007769740","https://openalex.org/W3035524453","https://openalex.org/W3090449556","https://openalex.org/W3094502228","https://openalex.org/W3145450063","https://openalex.org/W3159619744","https://openalex.org/W3174510143","https://openalex.org/W3184735396","https://openalex.org/W3205786327","https://openalex.org/W4221167396","https://openalex.org/W4226167593","https://openalex.org/W4283211057","https://openalex.org/W4283449034","https://openalex.org/W4285218426","https://openalex.org/W4312643908","https://openalex.org/W4313156423","https://openalex.org/W6682849425","https://openalex.org/W6751661917","https://openalex.org/W6769035977","https://openalex.org/W6769596995","https://openalex.org/W6774314701","https://openalex.org/W6784333009","https://openalex.org/W6791353385","https://openalex.org/W6798805250","https://openalex.org/W6810080435","https://openalex.org/W6810265253","https://openalex.org/W6810655313","https://openalex.org/W6838638105","https://openalex.org/W6838959477","https://openalex.org/W6845226490"],"related_works":["https://openalex.org/W2068608913","https://openalex.org/W3124914020","https://openalex.org/W2141033859","https://openalex.org/W2077542787","https://openalex.org/W2156434174","https://openalex.org/W2071701083","https://openalex.org/W2383687187","https://openalex.org/W2070401501","https://openalex.org/W2121496884","https://openalex.org/W2913184176"],"abstract_inverted_index":{"Visual":[0],"pre-training":[1,27,47,57,81],"with":[2,19],"large-scale":[3,106],"real-world":[4],"data":[5],"has":[6],"made":[7],"great":[8,14],"progress":[9],"in":[10,16,124],"recent":[11],"years,":[12],"showing":[13],"potential":[15],"robot":[17,29,50,74,84,122,131],"learning":[18,91,100,113],"pixel":[20],"observations.":[21],"However,":[22],"the":[23,43,96,110,129,133,136],"recipes":[24],"of":[25,45,135],"visual":[26,46,80,114],"for":[28,73,83],"manipulation":[30,51,85],"tasks":[31,52],"are":[32,68,71],"yet":[33],"to":[34,101],"be":[35,144],"built.":[36],"In":[37],"this":[38],"paper,":[39],"we":[40,77],"thoroughly":[41],"investigate":[42],"effects":[44],"strategies":[48],"on":[49,121,146],"from":[53,105],"three":[54],"fundamental":[55],"perspectives:":[56],"datasets,":[58],"model":[59],"architectures":[60],"and":[61,92,116,128,140],"training":[62],"methods.":[63],"Several":[64],"significant":[65],"experimental":[66],"findings":[67],"provided":[69],"that":[70],"beneficial":[72],"learning.":[75,94],"Further,":[76],"propose":[78],"a":[79],"scheme":[82],"termed":[86],"Vi-PRoM,":[87],"which":[88],"combines":[89],"self-supervised":[90],"supervised":[93],"Concretely,":[95],"former":[97],"employs":[98],"contrastive":[99],"acquire":[102],"underlying":[103],"patterns":[104],"unlabeled":[107],"data,":[108],"while":[109],"latter":[111],"aims":[112],"semantics":[115],"temporal":[117],"dynamics.":[118],"Extensive":[119],"experiments":[120],"manipulations":[123],"various":[125],"simulation":[126],"environments":[127],"real":[130],"demonstrate":[132],"superiority":[134],"proposed":[137],"scheme.":[138],"Videos":[139],"more":[141],"details":[142],"can":[143],"found":[145],"https://explore-pretrain-robot.github.io.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
