{"id":"https://openalex.org/W4401415982","doi":"https://doi.org/10.1109/icra57147.2024.10610695","title":"Composing Pre-Trained Object-Centric Representations for Robotics From \"What\" and \"Where\" Foundation Models","display_name":"Composing Pre-Trained Object-Centric Representations for Robotics From \"What\" and \"Where\" Foundation Models","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401415982","doi":"https://doi.org/10.1109/icra57147.2024.10610695"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610695","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610695","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101211964","display_name":"Junyao Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Junyao Shi","raw_affiliation_strings":["University of Pennsylvania,Computer and Information Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Pennsylvania,Computer and Information Science","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059057417","display_name":"Jianing Qian","orcid":null},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jianing Qian","raw_affiliation_strings":["University of Pennsylvania,Computer and Information Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Pennsylvania,Computer and Information Science","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063425598","display_name":"Yecheng Jason Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yecheng Jason Ma","raw_affiliation_strings":["University of Pennsylvania,Computer and Information Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Pennsylvania,Computer and Information Science","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079302923","display_name":"Dinesh Jayaraman","orcid":"https://orcid.org/0000-0002-6888-3095"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dinesh Jayaraman","raw_affiliation_strings":["University of Pennsylvania,Computer and Information Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Pennsylvania,Computer and Information Science","institution_ids":["https://openalex.org/I79576946"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101211964"],"corresponding_institution_ids":["https://openalex.org/I79576946"],"apc_list":null,"apc_paid":null,"fwci":1.3547,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87338243,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"15424","last_page":"15432"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.7991889715194702},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.7378928661346436},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7363704442977905},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.63970547914505},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5749236941337585},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3647516965866089},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32218414545059204},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3148186206817627},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.05879199504852295}],"concepts":[{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.7991889715194702},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.7378928661346436},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7363704442977905},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.63970547914505},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5749236941337585},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3647516965866089},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32218414545059204},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3148186206817627},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.05879199504852295},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610695","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610695","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":109,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1536680647","https://openalex.org/W1583837637","https://openalex.org/W1968001820","https://openalex.org/W2082627290","https://openalex.org/W2108598243","https://openalex.org/W2151103935","https://openalex.org/W2159706693","https://openalex.org/W2161969291","https://openalex.org/W2194775991","https://openalex.org/W2222512263","https://openalex.org/W2607037079","https://openalex.org/W2806070179","https://openalex.org/W2896457183","https://openalex.org/W2948293419","https://openalex.org/W2955368974","https://openalex.org/W2963305465","https://openalex.org/W2963689319","https://openalex.org/W2964015378","https://openalex.org/W2967895468","https://openalex.org/W2984756091","https://openalex.org/W2991222810","https://openalex.org/W2999417551","https://openalex.org/W3007769740","https://openalex.org/W3037784242","https://openalex.org/W3040956815","https://openalex.org/W3098499184","https://openalex.org/W3184773395","https://openalex.org/W3187722890","https://openalex.org/W3205786327","https://openalex.org/W3207057769","https://openalex.org/W4221159977","https://openalex.org/W4225670345","https://openalex.org/W4225947020","https://openalex.org/W4226167593","https://openalex.org/W4226246634","https://openalex.org/W4235169531","https://openalex.org/W4244030505","https://openalex.org/W4283379570","https://openalex.org/W4285704217","https://openalex.org/W4287554891","https://openalex.org/W4288094291","https://openalex.org/W4288322145","https://openalex.org/W4288337580","https://openalex.org/W4292779060","https://openalex.org/W4295246696","https://openalex.org/W4297733535","https://openalex.org/W4302010007","https://openalex.org/W4303440113","https://openalex.org/W4303648971","https://openalex.org/W4307078757","https://openalex.org/W4313156423","https://openalex.org/W4313857118","https://openalex.org/W4320342559","https://openalex.org/W4321319299","https://openalex.org/W4324260265","https://openalex.org/W4365606129","https://openalex.org/W4379260839","https://openalex.org/W4383097668","https://openalex.org/W4383473925","https://openalex.org/W4385245566","https://openalex.org/W4385430559","https://openalex.org/W4385430582","https://openalex.org/W4386071707","https://openalex.org/W4390874575","https://openalex.org/W4401414212","https://openalex.org/W4401415650","https://openalex.org/W6689029123","https://openalex.org/W6701573534","https://openalex.org/W6726873649","https://openalex.org/W6744580074","https://openalex.org/W6754677871","https://openalex.org/W6755207826","https://openalex.org/W6758420182","https://openalex.org/W6764529735","https://openalex.org/W6765399202","https://openalex.org/W6765456200","https://openalex.org/W6769281838","https://openalex.org/W6770507128","https://openalex.org/W6772619266","https://openalex.org/W6778883912","https://openalex.org/W6779809370","https://openalex.org/W6779889584","https://openalex.org/W6787728148","https://openalex.org/W6791353385","https://openalex.org/W6793805516","https://openalex.org/W6797962799","https://openalex.org/W6799150178","https://openalex.org/W6800774826","https://openalex.org/W6804342959","https://openalex.org/W6809873821","https://openalex.org/W6810080435","https://openalex.org/W6810084619","https://openalex.org/W6810655313","https://openalex.org/W6811470611","https://openalex.org/W6838586813","https://openalex.org/W6841115740","https://openalex.org/W6845226490","https://openalex.org/W6845737239","https://openalex.org/W6845793730","https://openalex.org/W6845848261","https://openalex.org/W6846242362","https://openalex.org/W6849988508","https://openalex.org/W6850734919","https://openalex.org/W6851416138","https://openalex.org/W6851607685","https://openalex.org/W6853053110","https://openalex.org/W6854200302","https://openalex.org/W6857685038"],"related_works":["https://openalex.org/W2381393187","https://openalex.org/W2332779545","https://openalex.org/W2358060160","https://openalex.org/W2035483685","https://openalex.org/W1969764885","https://openalex.org/W596947562","https://openalex.org/W2793937822","https://openalex.org/W2790817834","https://openalex.org/W2220552745","https://openalex.org/W1508899372"],"abstract_inverted_index":{"There":[0],"have":[1],"recently":[2],"been":[3],"large":[4],"advances":[5],"both":[6],"in":[7,19,49,68],"pre-training":[8],"visual":[9],"representations":[10,39,48,103,151,159],"for":[11,25,35,40,89,104,133,152],"robotic":[12,41,90,126,134],"control":[13,91,105],"and":[14,51,124,142],"segmenting":[15],"unknown":[16],"category":[17],"objects":[18],"general":[20],"images.":[21],"To":[22,74],"leverage":[23],"these":[24],"improved":[26],"robot":[27],"learning,":[28],"we":[29,54,79,128],"propose":[30],"POCR,":[31],"a":[32,58],"new":[33,119],"framework":[34],"building":[36],"pre-trained":[37,59,82,101,115,150],"object-centric":[38,102,158],"control.":[42],"Building":[43],"on":[44,137],"theories":[45],"of":[46,113,147],"\"what-where\"":[47],"psychology":[50],"computer":[52],"vision,":[53],"use":[55],"segmentations":[56],"from":[57,164],"model":[60],"to":[61],"stably":[62],"locate":[63],"across":[64],"timesteps,":[65],"various":[66,122],"entities":[67],"the":[69,96,111,148],"scene,":[70],"capturing":[71,94],"\"where\"":[72],"information.":[73],"each":[75],"such":[76],"segmented":[77],"entity,":[78],"apply":[80],"other":[81],"models":[83],"that":[84,130,160],"build":[85],"vector":[86],"descriptions":[87],"suitable":[88],"tasks,":[92,127],"thus":[93],"\"what\"":[95],"entity":[97],"is.":[98],"Thus,":[99],"our":[100],"are":[106,161],"constructed":[107],"by":[108],"appropriately":[109],"combining":[110],"outputs":[112],"off-the-shelf":[114],"models,":[116],"with":[117],"no":[118],"training.":[120],"On":[121],"simulated":[123],"real":[125],"show":[129],"imitation":[131],"policies":[132],"manipulators":[135],"trained":[136,163],"POCR":[138],"achieve":[139],"better":[140],"performance":[141],"systematic":[143],"generalization":[144],"than":[145],"state":[146],"art":[149],"robotics,":[153],"as":[154,156],"well":[155],"prior":[157],"typically":[162],"scratch.":[165]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2025-10-10T00:00:00"}
