{"id":"https://openalex.org/W4413926249","doi":"https://doi.org/10.1109/icra55743.2025.11127332","title":"Feature Extractor or Decision Maker: Rethinking the Role of Visual Encoders in Visuomotor Policies","display_name":"Feature Extractor or Decision Maker: Rethinking the Role of Visual Encoders in Visuomotor Policies","publication_year":2025,"publication_date":"2025-05-19","ids":{"openalex":"https://openalex.org/W4413926249","doi":"https://doi.org/10.1109/icra55743.2025.11127332"},"language":"en","primary_location":{"id":"doi:10.1109/icra55743.2025.11127332","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11127332","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100730180","display_name":"Ruiyu Wang","orcid":"https://orcid.org/0000-0002-0242-1488"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Ruiyu Wang","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102427121","display_name":"Zheyu Zhuang","orcid":null},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Zheyu Zhuang","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023878608","display_name":"Shutong Jin","orcid":"https://orcid.org/0000-0003-0611-4239"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Shutong Jin","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094255586","display_name":"Nils Ingelhag","orcid":null},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Nils Ingelhag","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023792180","display_name":"Danica Kragi\u0107","orcid":"https://orcid.org/0000-0003-2965-2953"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Danica Kragic","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018027629","display_name":"Florian T. Pokorny","orcid":"https://orcid.org/0000-0003-1114-6040"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Florian T. Pokorny","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of Robotics, Perception and Learning,Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100730180"],"corresponding_institution_ids":["https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2830703,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3654","last_page":"3661"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.6014999747276306,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.6014999747276306,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.7036240100860596},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6814761161804199},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.649878978729248},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5931665897369385},{"id":"https://openalex.org/keywords/decision-maker","display_name":"Decision maker","score":0.5123742818832397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5077298879623413},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.412310391664505},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36065465211868286},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17702990770339966},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13789698481559753},{"id":"https://openalex.org/keywords/operations-research","display_name":"Operations research","score":0.1336836814880371},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10960391163825989}],"concepts":[{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.7036240100860596},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6814761161804199},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.649878978729248},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5931665897369385},{"id":"https://openalex.org/C2986080485","wikidata":"https://www.wikidata.org/wiki/Q1331926","display_name":"Decision maker","level":2,"score":0.5123742818832397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5077298879623413},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.412310391664505},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36065465211868286},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17702990770339966},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13789698481559753},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.1336836814880371},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10960391163825989},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra55743.2025.11127332","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11127332","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322327","display_name":"Knut och Alice Wallenbergs Stiftelse","ror":"https://ror.org/004hzzk67"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1977655452","https://openalex.org/W2102605133","https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2604382266","https://openalex.org/W2952122856","https://openalex.org/W2963703197","https://openalex.org/W3006398608","https://openalex.org/W3205786327","https://openalex.org/W3211462570","https://openalex.org/W4250482878","https://openalex.org/W4293363567","https://openalex.org/W4313156423","https://openalex.org/W4383109144","https://openalex.org/W4385403811","https://openalex.org/W4385431288","https://openalex.org/W4389666747","https://openalex.org/W4402354178"],"related_works":["https://openalex.org/W3082848404","https://openalex.org/W1979583797","https://openalex.org/W2016864125","https://openalex.org/W2372254676","https://openalex.org/W4390516098","https://openalex.org/W2080135837","https://openalex.org/W2793679056","https://openalex.org/W4283332091","https://openalex.org/W2181948922","https://openalex.org/W4413042444"],"abstract_inverted_index":{"An":[0],"end-to-end":[1],"(E2E)":[2],"visuomotor":[3],"policy":[4],"is":[5],"typically":[6],"treated":[7],"as":[8,38,135],"a":[9,121],"unified":[10],"whole,":[11],"but":[12],"recent":[13],"approaches":[14],"using":[15],"out-of-domain":[16],"(OOD)":[17],"data":[18,74],"to":[19,37,50,69,102,129],"pretrain":[20],"the":[21,27,31,34,39,52,77,103],"visual":[22,28,65,116],"encoder":[23,29],"have":[24],"cleanly":[25],"separated":[26],"from":[30,72],"network,":[32],"with":[33],"remainder":[35],"referred":[36],"policy.":[40],"We":[41,110],"propose":[42],"Visual":[43],"Alignment":[44],"Testing,":[45],"an":[46,91],"experimental":[47],"framework":[48],"designed":[49],"evaluate":[51],"validity":[53],"of":[54,95,115],"this":[55,88,112],"functional":[56,79],"separation.":[57,80],"Our":[58],"results":[59],"indicate":[60],"that":[61],"in":[62,97],"E2E-trained":[63],"models,":[64,84],"encoders":[66,86],"actively":[67],"contribute":[68],"decision-making":[70,132],"resulting":[71],"motor":[73],"supervision,":[75],"contradicting":[76],"assumed":[78],"In":[81],"contrast,":[82],"OOD-pretrained":[83],"where":[85],"lack":[87],"capability,":[89],"experience":[90],"average":[92],"performance":[93,105],"drop":[94],"42%":[96],"our":[98],"benchmark":[99],"results,":[100],"compared":[101],"state-of-the-art":[104],"achieved":[106],"by":[107],"E2E":[108],"policies.":[109],"believe":[111],"initial":[113],"exploration":[114],"encoders'":[117],"role":[118],"can":[119],"provide":[120],"first":[122],"step":[123],"towards":[124],"guiding":[125],"future":[126],"pretraining":[127],"methods":[128],"address":[130],"their":[131],"ability,":[133],"such":[134],"developing":[136],"task-conditioned":[137],"or":[138],"context-aware":[139],"encoders.":[140]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
