{"id":"https://openalex.org/W4416748053","doi":"https://doi.org/10.1109/iros60139.2025.11246608","title":"ViT-VS: On the Applicability of Pretrained Vision Transformer Features for Generalizable Visual Servoing","display_name":"ViT-VS: On the Applicability of Pretrained Vision Transformer Features for Generalizable Visual Servoing","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416748053","doi":"https://doi.org/10.1109/iros60139.2025.11246608"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246608","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246608","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114345877","display_name":"Alessandro Scherl","orcid":"https://orcid.org/0009-0008-8061-6353"},"institutions":[{"id":"https://openalex.org/I130194489","display_name":"University of Alicante","ror":"https://ror.org/05t8bcz72","country_code":"ES","type":"education","lineage":["https://openalex.org/I130194489"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Alessandro Scherl","raw_affiliation_strings":["University of Alicante,Department of Computer Technology,Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Alicante,Department of Computer Technology,Spain","institution_ids":["https://openalex.org/I130194489"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074166804","display_name":"Stefan Thalhammer","orcid":"https://orcid.org/0000-0002-0008-430X"},"institutions":[{"id":"https://openalex.org/I121760703","display_name":"University of Applied Sciences Technikum Wien","ror":"https://ror.org/04jsx0x49","country_code":"AT","type":"education","lineage":["https://openalex.org/I121760703"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Stefan Thalhammer","raw_affiliation_strings":["UAS Technikum,Industrial Engineering Department,Vienna,Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UAS Technikum,Industrial Engineering Department,Vienna,Austria","institution_ids":["https://openalex.org/I121760703"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002985475","display_name":"Bernhard Neuberger","orcid":null},"institutions":[{"id":"https://openalex.org/I121760703","display_name":"University of Applied Sciences Technikum Wien","ror":"https://ror.org/04jsx0x49","country_code":"AT","type":"education","lineage":["https://openalex.org/I121760703"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Bernhard Neuberger","raw_affiliation_strings":["UAS Technikum,Industrial Engineering Department,Vienna,Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UAS Technikum,Industrial Engineering Department,Vienna,Austria","institution_ids":["https://openalex.org/I121760703"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035662406","display_name":"Wilfried W\u00f6ber","orcid":"https://orcid.org/0000-0002-0881-205X"},"institutions":[{"id":"https://openalex.org/I121760703","display_name":"University of Applied Sciences Technikum Wien","ror":"https://ror.org/04jsx0x49","country_code":"AT","type":"education","lineage":["https://openalex.org/I121760703"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Wilfried W\u00f6ber","raw_affiliation_strings":["UAS Technikum,Industrial Engineering Department,Vienna,Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UAS Technikum,Industrial Engineering Department,Vienna,Austria","institution_ids":["https://openalex.org/I121760703"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079599826","display_name":"Jos\u00e9 Garc\u00eda\u2010Rodr\u00edguez","orcid":"https://orcid.org/0000-0002-7798-3055"},"institutions":[{"id":"https://openalex.org/I130194489","display_name":"University of Alicante","ror":"https://ror.org/05t8bcz72","country_code":"ES","type":"education","lineage":["https://openalex.org/I130194489"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jos\u00e9 Garc\u00eda-Rodr\u00edguez","raw_affiliation_strings":["University of Alicante,Department of Computer Technology,Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Alicante,Department of Computer Technology,Spain","institution_ids":["https://openalex.org/I130194489"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40033722,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"17769","last_page":"17776"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.43689998984336853,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.43689998984336853,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.35339999198913574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10868","display_name":"Soft Robotics and Applications","score":0.032999999821186066,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visual-servoing","display_name":"Visual servoing","score":0.9315999746322632},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7127000093460083},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5935999751091003},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4542999863624573},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.41850000619888306},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.34779998660087585}],"concepts":[{"id":"https://openalex.org/C10912380","wikidata":"https://www.wikidata.org/wiki/Q527952","display_name":"Visual servoing","level":3,"score":0.9315999746322632},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7583000063896179},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7127000093460083},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6865000128746033},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6606000065803528},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5935999751091003},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4542999863624573},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.41850000619888306},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.34779998660087585},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.3269999921321869},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.31690001487731934},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C2780527621","wikidata":"https://www.wikidata.org/wiki/Q7936593","display_name":"Visual control","level":2,"score":0.2581999897956848}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246608","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246608","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1913744585","https://openalex.org/W1972267259","https://openalex.org/W2021788198","https://openalex.org/W2048710758","https://openalex.org/W2062007617","https://openalex.org/W2082991751","https://openalex.org/W2105229870","https://openalex.org/W2111691855","https://openalex.org/W2117228865","https://openalex.org/W2136492961","https://openalex.org/W2151103935","https://openalex.org/W2167501464","https://openalex.org/W2788822937","https://openalex.org/W3045988483","https://openalex.org/W3159481202","https://openalex.org/W3171969922","https://openalex.org/W3203752009","https://openalex.org/W4207020220","https://openalex.org/W4312445439","https://openalex.org/W4312493700","https://openalex.org/W4312521235","https://openalex.org/W4313159712","https://openalex.org/W4376890432","https://openalex.org/W4390874575","https://openalex.org/W4391678142","https://openalex.org/W4402753712"],"related_works":[],"abstract_inverted_index":{"Visual":[0],"servoing":[1,51,91],"enables":[2],"robots":[3],"to":[4,10,72,94],"precisely":[5],"position":[6],"their":[7],"end-effector":[8,122],"relative":[9,96],"a":[11,49,134],"target":[12],"object.":[13],"While":[14],"classical":[15,88],"methods":[16,107],"rely":[17],"on":[18],"hand-crafted":[19],"features":[20],"and":[21,34,86,127,142],"thus":[22],"are":[23,108,145],"universally":[24],"applicable":[25],"without":[26],"task-specific":[27],"training,":[28],"they":[29],"often":[30],"struggle":[31],"with":[32],"occlusions":[33],"environmental":[35],"variations,":[36],"whereas":[37],"learning-based":[38,106],"approaches":[39],"improve":[40],"robustness":[41],"but":[42],"typically":[43],"require":[44],"extensive":[45],"training.":[46,115],"We":[47],"present":[48],"visual":[50,90],"approach":[52,79],"that":[53],"leverages":[54],"pretrained":[55],"vision":[56],"transformers":[57],"for":[58],"semantic":[59],"feature":[60],"extraction,":[61],"combining":[62],"the":[63,75,102,137],"advantages":[64],"of":[65,105,129],"both":[66],"paradigms":[67],"while":[68],"also":[69],"being":[70],"able":[71],"generalize":[73],"beyond":[74],"provided":[76],"sample.":[77],"Our":[78,140],"achieves":[80],"full":[81],"convergence":[82,103],"in":[83,98,121],"unperturbed":[84],"scenarios":[85],"surpasses":[87],"image-based":[89],"by":[92],"up":[93],"31.2%":[95],"improvement":[97],"perturbed":[99],"scenarios.":[100],"Even":[101],"rates":[104],"matched":[109],"despite":[110],"requiring":[111],"no":[112],"task-or":[113],"object-specific":[114],"Real-world":[116],"evaluations":[117],"confirm":[118],"robust":[119],"performance":[120],"positioning,":[123],"industrial":[124],"box":[125],"manipulation,":[126],"grasping":[128],"unseen":[130],"objects":[131],"using":[132],"only":[133],"reference":[135],"from":[136],"same":[138],"category.":[139],"code":[141],"simulation":[143],"environment":[144],"available":[146],"at:":[147],"https://alessandroscherl.github.io/ViT-VS/":[148]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-28T00:00:00"}
