{"id":"https://openalex.org/W4401415650","doi":"https://doi.org/10.1109/icra57147.2024.10610131","title":"Recasting Generic Pretrained Vision Transformers As Object-Centric Scene Encoders For Manipulation Policies","display_name":"Recasting Generic Pretrained Vision Transformers As Object-Centric Scene Encoders For Manipulation Policies","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401415650","doi":"https://doi.org/10.1109/icra57147.2024.10610131"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610131","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610131","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059057417","display_name":"Jianing Qian","orcid":null},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jianing Qian","raw_affiliation_strings":["University of Pennsylvania,GRASP Lab,Computer and Information Science Department,USA"],"affiliations":[{"raw_affiliation_string":"University of Pennsylvania,GRASP Lab,Computer and Information Science Department,USA","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076074875","display_name":"Anastasios Panagopoulos","orcid":null},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anastasios Panagopoulos","raw_affiliation_strings":["University of Pennsylvania,GRASP Lab,Computer and Information Science Department,USA"],"affiliations":[{"raw_affiliation_string":"University of Pennsylvania,GRASP Lab,Computer and Information Science Department,USA","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079302923","display_name":"Dinesh Jayaraman","orcid":"https://orcid.org/0000-0002-6888-3095"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dinesh Jayaraman","raw_affiliation_strings":["University of Pennsylvania,GRASP Lab,Computer and Information Science Department,USA"],"affiliations":[{"raw_affiliation_string":"University of Pennsylvania,GRASP Lab,Computer and Information Science Department,USA","institution_ids":["https://openalex.org/I79576946"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5059057417"],"corresponding_institution_ids":["https://openalex.org/I79576946"],"apc_list":null,"apc_paid":null,"fwci":1.0526,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.77684911,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"17544","last_page":"17552"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9289000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9289000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.744837760925293},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7137348651885986},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6944971084594727},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6325619220733643},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5881775617599487},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.48374366760253906},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.36976611614227295},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1619713306427002},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.13177627325057983},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.0761185884475708}],"concepts":[{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.744837760925293},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7137348651885986},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6944971084594727},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6325619220733643},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5881775617599487},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.48374366760253906},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.36976611614227295},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1619713306427002},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.13177627325057983},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0761185884475708},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610131","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610131","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":103,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W2012210378","https://openalex.org/W2102605133","https://openalex.org/W2110158442","https://openalex.org/W2132914434","https://openalex.org/W2161236525","https://openalex.org/W2194775991","https://openalex.org/W2799140957","https://openalex.org/W2948293419","https://openalex.org/W2952793010","https://openalex.org/W2962793652","https://openalex.org/W2963542991","https://openalex.org/W2964242760","https://openalex.org/W2964846271","https://openalex.org/W2981344907","https://openalex.org/W3006398608","https://openalex.org/W3007769740","https://openalex.org/W3009561768","https://openalex.org/W3035060554","https://openalex.org/W3035422918","https://openalex.org/W3040956815","https://openalex.org/W3094502228","https://openalex.org/W3159481202","https://openalex.org/W3170874841","https://openalex.org/W3171007011","https://openalex.org/W3176196997","https://openalex.org/W3204171527","https://openalex.org/W3205786327","https://openalex.org/W3213836217","https://openalex.org/W4221159977","https://openalex.org/W4221161778","https://openalex.org/W4226330622","https://openalex.org/W4235169531","https://openalex.org/W4244030505","https://openalex.org/W4247609441","https://openalex.org/W4249009392","https://openalex.org/W4280647773","https://openalex.org/W4282941252","https://openalex.org/W4285704217","https://openalex.org/W4286696412","https://openalex.org/W4287554891","https://openalex.org/W4288103164","https://openalex.org/W4288337580","https://openalex.org/W4302010007","https://openalex.org/W4303440113","https://openalex.org/W4303648971","https://openalex.org/W4312443924","https://openalex.org/W4312453657","https://openalex.org/W4312505596","https://openalex.org/W4312665716","https://openalex.org/W4312755804","https://openalex.org/W4313156423","https://openalex.org/W4313857118","https://openalex.org/W4319299748","https://openalex.org/W4320342559","https://openalex.org/W4320558669","https://openalex.org/W4321319299","https://openalex.org/W4323066621","https://openalex.org/W4364387022","https://openalex.org/W4366208220","https://openalex.org/W4377971222","https://openalex.org/W4379260839","https://openalex.org/W4383473925","https://openalex.org/W4386113253","https://openalex.org/W4390874575","https://openalex.org/W4402354166","https://openalex.org/W6629368666","https://openalex.org/W6689029123","https://openalex.org/W6766312635","https://openalex.org/W6767868144","https://openalex.org/W6769596995","https://openalex.org/W6774314701","https://openalex.org/W6774670964","https://openalex.org/W6779326418","https://openalex.org/W6779889584","https://openalex.org/W6784333009","https://openalex.org/W6787728148","https://openalex.org/W6788135285","https://openalex.org/W6793805516","https://openalex.org/W6797962799","https://openalex.org/W6800564222","https://openalex.org/W6804160461","https://openalex.org/W6809788415","https://openalex.org/W6810080435","https://openalex.org/W6810084619","https://openalex.org/W6811470611","https://openalex.org/W6839632867","https://openalex.org/W6845226490","https://openalex.org/W6845793730","https://openalex.org/W6845848261","https://openalex.org/W6846242362","https://openalex.org/W6849988508","https://openalex.org/W6850056956","https://openalex.org/W6850300045","https://openalex.org/W6851416138","https://openalex.org/W6851578965","https://openalex.org/W6851724587","https://openalex.org/W6851800889","https://openalex.org/W6853053110","https://openalex.org/W6854200302","https://openalex.org/W6857685038","https://openalex.org/W6861939816","https://openalex.org/W6864693536"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W2142795561","https://openalex.org/W4205302943","https://openalex.org/W2561132942","https://openalex.org/W3155418658","https://openalex.org/W4243199227","https://openalex.org/W2379948177","https://openalex.org/W2334580170"],"abstract_inverted_index":{"Generic":[0],"re-usable":[1],"pre-trained":[2,64,112],"image":[3,39],"representation":[4],"encoders":[5,40],"have":[6],"become":[7],"a":[8,31,61],"standard":[9,108,128],"component":[10],"of":[11,34,82,110],"methods":[12],"for":[13,21,131],"many":[14],"computer":[15],"vision":[16,65,113],"tasks.":[17],"As":[18],"visual":[19],"representations":[20,80,130],"robots":[22],"however,":[23],"their":[24,49],"utility":[25],"has":[26],"been":[27],"limited,":[28],"leading":[29],"to":[30,36,45],"recent":[32],"wave":[33],"efforts":[35],"pre-train":[37],"robotics-specific":[38],"that":[41,69,121],"are":[42],"better":[43],"suited":[44],"robotic":[46],"tasks":[47,133],"than":[48,78],"generic":[50,111],"counterparts.":[51],"We":[52],"propose":[53],"Scene":[54],"Objects":[55],"From":[56],"Transformers,":[57],"abbreviated":[58],"as":[59],"SOFT(\u2022),":[60],"wrapper":[62],"around":[63],"transformer":[66],"(PVT)":[67],"models":[68],"bridges":[70],"this":[71],"gap":[72],"without":[73],"any":[74],"further":[75],"training.":[76],"Rather":[77],"construct":[79],"out":[81],"only":[83],"the":[84,140],"final":[85],"layer":[86],"activations,":[87,102],"SOFT(\u2022)":[88],"individuates":[89],"and":[90,97,136,146],"locates":[91],"object-like":[92],"entities":[93],"from":[94],"PVT":[95,101,129],"attentions,":[96],"describes":[98],"them":[99],"with":[100],"producing":[103],"an":[104],"object-centric":[105],"embedding.":[106],"Across":[107],"choices":[109],"transformers":[114],"PVT,":[115],"we":[116],"demonstrate":[117],"in":[118,134],"each":[119],"case":[120],"policies":[122],"trained":[123],"on":[124],"SOFT(PVT)":[125],"far":[126],"outstrip":[127],"manipulation":[132],"simulated":[135],"real":[137],"settings,":[138],"approaching":[139],"state-of-the-art":[141],"robotics-aware":[142],"representations.":[143],"Code,":[144],"appendix":[145],"videos:":[147],"https://sites.google.com/view/robot-soft/":[148]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
