{"id":"https://openalex.org/W4407403937","doi":"https://doi.org/10.1109/sii59315.2025.10870971","title":"Scalable, Training-Free Visual Language Robotics: a modular multi-model framework for consumer-grade GPUs","display_name":"Scalable, Training-Free Visual Language Robotics: a modular multi-model framework for consumer-grade GPUs","publication_year":2025,"publication_date":"2025-01-21","ids":{"openalex":"https://openalex.org/W4407403937","doi":"https://doi.org/10.1109/sii59315.2025.10870971"},"language":"en","primary_location":{"id":"doi:10.1109/sii59315.2025.10870971","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sii59315.2025.10870971","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/SICE International Symposium on System Integration (SII)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Marie Samson","orcid":null},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Marie Samson","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),CNRS-AIST JRL (Joint Robotics Laboratory),Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),CNRS-AIST JRL (Joint Robotics Laboratory),Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065027104","display_name":"Bastien Muraccioli","orcid":null},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Bastien Muraccioli","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),CNRS-AIST JRL (Joint Robotics Laboratory),Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),CNRS-AIST JRL (Joint Robotics Laboratory),Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091136016","display_name":"Fumio Kanehiro","orcid":"https://orcid.org/0000-0002-0277-3467"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Fumio Kanehiro","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),CNRS-AIST JRL (Joint Robotics Laboratory),Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),CNRS-AIST JRL (Joint Robotics Laboratory),Japan","institution_ids":["https://openalex.org/I73613424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I73613424"],"apc_list":null,"apc_paid":null,"fwci":1.7646,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.82832076,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"193","last_page":"198"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9617999792098999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9617999792098999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.9293000102043152,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.9290000200271606,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.8127411603927612},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8047323226928711},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.7646123170852661},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5946142673492432},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.501845121383667},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4659247100353241},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.34287703037261963},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.31728267669677734},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.21232467889785767}],"concepts":[{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.8127411603927612},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8047323226928711},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.7646123170852661},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5946142673492432},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.501845121383667},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4659247100353241},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.34287703037261963},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.31728267669677734},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.21232467889785767},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sii59315.2025.10870971","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sii59315.2025.10870971","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/SICE International Symposium on System Integration (SII)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2963037989","https://openalex.org/W4312420092","https://openalex.org/W4390874575","https://openalex.org/W6847252870","https://openalex.org/W6850503672","https://openalex.org/W6853330684","https://openalex.org/W6853664340","https://openalex.org/W6854738657","https://openalex.org/W6857733129","https://openalex.org/W6857785731","https://openalex.org/W6859532197","https://openalex.org/W6862960294","https://openalex.org/W6865118152","https://openalex.org/W6869853401"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W2389214306","https://openalex.org/W2378076731","https://openalex.org/W3216976533","https://openalex.org/W2019887508","https://openalex.org/W4401278057"],"abstract_inverted_index":{"The":[0,146],"integration":[1,151],"of":[2,77,130,141,152,191,223],"language":[3,106,136],"instructions":[4],"with":[5,227],"robotic":[6,71,154],"control,":[7],"particularly":[8],"through":[9],"Vision":[10],"Language":[11,51,94],"Action":[12],"(VLA)":[13],"models,":[14],"has":[15],"shown":[16],"significant":[17],"potential.":[18],"However,":[19],"these":[20,212],"systems":[21],"are":[22,215,231],"often":[23],"hindered":[24],"by":[25,158],"high":[26],"computational":[27],"costs,":[28],"the":[29,62,83,167,180],"need":[30,63,168],"for":[31,41,64,70,123,149,169],"extensive":[32],"retraining,":[33,65],"and":[34,97,105,126,156,163,186,193,225,238],"limited":[35],"scalability,":[36],"making":[37],"them":[38,120],"less":[39],"accessible":[40],"widespread":[42],"use.In":[43],"this":[44],"paper,":[45],"we":[46],"introduce":[47],"SVLR":[48,73,142,175],"(Scalable":[49],"Visual":[50],"Robotics)<sup":[52],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[53],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>,":[54],"an":[55,116,198],"open-source,":[56],"modular":[57],"framework":[58,147],"that":[59,174],"operates":[60,195],"without":[61,166],"providing":[66],"a":[67,75,128,188,220],"scalable":[68],"solution":[69],"control.":[72],"leverages":[74],"combination":[76],"lightweight,":[78],"open-source":[79],"AI":[80,184],"models":[81,109,230],"including":[82],"Vision-Language":[84],"Model":[85,95],"(VLM)":[86],"Mini-InternVL,":[87],"zero-shot":[88],"image":[89],"segmentation":[90],"model":[91,100],"CLIPSeg,":[92],"Large":[93],"Phi-3,":[96],"sentence":[98],"similarity":[99],"all-MiniLM":[101],"to":[102,112,134,179,233],"process":[103],"visual":[104],"inputs.":[107],"These":[108],"work":[110],"together":[111],"identify":[113],"objects":[114],"in":[115,132,183,207,240],"unknown":[117],"environment,":[118],"use":[119],"as":[121],"parameters":[122],"task":[124,164],"execution,":[125],"generate":[127],"sequence":[129],"actions":[131],"response":[133],"natural":[135],"instructions.":[137],"A":[138],"key":[139],"strength":[140],"is":[143],"its":[144],"scalability.":[145],"allows":[148],"easy":[150],"new":[153],"tasks":[155,224],"robots":[157,192],"simply":[159],"adding":[160],"text":[161],"descriptions":[162],"definitions,":[165],"retraining.":[170],"This":[171],"modularity":[172],"ensures":[173],"can":[176],"continuously":[177],"adapt":[178],"latest":[181],"advancements":[182],"technologies":[185],"support":[187],"wide":[189],"range":[190],"tasks.SVLR":[194],"effectively":[196],"on":[197],"NVIDIA":[199],"RTX":[200],"2070":[201],"(mobile)":[202],"GPU,":[203],"demonstrating":[204],"promising":[205],"performance":[206,239],"executing":[208],"pick-and-place":[209],"tasks.":[210],"While":[211],"initial":[213],"results":[214],"encouraging,":[216],"further":[217],"evaluation":[218],"across":[219],"broader":[221],"set":[222],"comparisons":[226],"existing":[228],"VLA":[229],"needed":[232],"assess":[234],"SVLR\u2019s":[235],"generalization":[236],"capabilities":[237],"more":[241],"complex":[242],"scenarios.":[243]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
