{"id":"https://openalex.org/W4416252193","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228732","title":"ROVER: Autonomous Open-Vocabulary Object Searching in Unexplored Environments Using VLM-Driven Scene Understanding","display_name":"ROVER: Autonomous Open-Vocabulary Object Searching in Unexplored Environments Using VLM-Driven Scene Understanding","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416252193","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228732"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228732","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228732","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087072961","display_name":"Abdul Basit","orcid":"https://orcid.org/0000-0002-0092-6853"},"institutions":[{"id":"https://openalex.org/I120250893","display_name":"New York University Abu Dhabi","ror":"https://ror.org/00e5k0821","country_code":"AE","type":"education","lineage":["https://openalex.org/I120250893","https://openalex.org/I57206974"]}],"countries":["AE"],"is_corresponding":true,"raw_author_name":"Abdul Basit","raw_affiliation_strings":["New York University (NYU) Abu Dhabi,eBRAIN Lab, Division of Engineering,Abu Dhabi,UAE"],"affiliations":[{"raw_affiliation_string":"New York University (NYU) Abu Dhabi,eBRAIN Lab, Division of Engineering,Abu Dhabi,UAE","institution_ids":["https://openalex.org/I120250893"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114413769","display_name":"Niraj Pudasaini","orcid":null},"institutions":[{"id":"https://openalex.org/I120250893","display_name":"New York University Abu Dhabi","ror":"https://ror.org/00e5k0821","country_code":"AE","type":"education","lineage":["https://openalex.org/I120250893","https://openalex.org/I57206974"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Niraj Pudasaini","raw_affiliation_strings":["New York University (NYU) Abu Dhabi,eBRAIN Lab, Division of Engineering,Abu Dhabi,UAE"],"affiliations":[{"raw_affiliation_string":"New York University (NYU) Abu Dhabi,eBRAIN Lab, Division of Engineering,Abu Dhabi,UAE","institution_ids":["https://openalex.org/I120250893"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005190949","display_name":"Muhammad Shafique","orcid":"https://orcid.org/0000-0002-2607-8135"},"institutions":[{"id":"https://openalex.org/I120250893","display_name":"New York University Abu Dhabi","ror":"https://ror.org/00e5k0821","country_code":"AE","type":"education","lineage":["https://openalex.org/I120250893","https://openalex.org/I57206974"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Muhammad Shafique","raw_affiliation_strings":["New York University (NYU) Abu Dhabi,eBRAIN Lab, Division of Engineering,Abu Dhabi,UAE"],"affiliations":[{"raw_affiliation_string":"New York University (NYU) Abu Dhabi,eBRAIN Lab, Division of Engineering,Abu Dhabi,UAE","institution_ids":["https://openalex.org/I120250893"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5087072961"],"corresponding_institution_ids":["https://openalex.org/I120250893"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37581121,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3382999897003174,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3382999897003174,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.22120000422000885,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.19210000336170197,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.7325000166893005},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.6471999883651733},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6129000186920166},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.5037999749183655},{"id":"https://openalex.org/keywords/search-and-rescue","display_name":"Search and rescue","score":0.4702000021934509},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.44690001010894775},{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.4207000136375427},{"id":"https://openalex.org/keywords/simultaneous-localization-and-mapping","display_name":"Simultaneous localization and mapping","score":0.40880000591278076}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.77920001745224},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.7325000166893005},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6712999939918518},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.6471999883651733},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6129000186920166},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5830000042915344},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.5037999749183655},{"id":"https://openalex.org/C2775935494","wikidata":"https://www.wikidata.org/wiki/Q741964","display_name":"Search and rescue","level":3,"score":0.4702000021934509},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.44690001010894775},{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.4207000136375427},{"id":"https://openalex.org/C86369673","wikidata":"https://www.wikidata.org/wiki/Q1203659","display_name":"Simultaneous localization and mapping","level":4,"score":0.40880000591278076},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.40709999203681946},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.373199999332428},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C2776548393","wikidata":"https://www.wikidata.org/wiki/Q2031473","display_name":"Unmanned ground vehicle","level":2,"score":0.32440000772476196},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.31529998779296875},{"id":"https://openalex.org/C20894473","wikidata":"https://www.wikidata.org/wiki/Q1116105","display_name":"Object model","level":3,"score":0.2896000146865845},{"id":"https://openalex.org/C2776228582","wikidata":"https://www.wikidata.org/wiki/Q7455797","display_name":"Service robot","level":3,"score":0.2865999937057495},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2856999933719635},{"id":"https://openalex.org/C9628104","wikidata":"https://www.wikidata.org/wiki/Q788009","display_name":"Autonomous system (mathematics)","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.272599995136261},{"id":"https://openalex.org/C2778835581","wikidata":"https://www.wikidata.org/wiki/Q2916098","display_name":"Autonomous robot","level":4,"score":0.26510000228881836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228732","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228732","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W2193145675","https://openalex.org/W2336416123","https://openalex.org/W2898231767","https://openalex.org/W4388320584","https://openalex.org/W4402716047"],"related_works":[],"abstract_inverted_index":{"Autonomous":[0],"robots":[1,130,205],"operating":[2],"in":[3,73,92,170,188,214],"unstructured":[4],"and":[5,23,66,85,88,107,131,173,216],"unknown":[6],"environments":[7],"require":[8],"efficient":[9],"object":[10,27,41,47,60,90,163,212],"searching":[11,61],"capabilities":[12],"to":[13,33,110,195],"enable":[14],"real-world":[15,74,154],"applications":[16],"such":[17],"as":[18],"service":[19],"robotics,":[20],"warehouse":[21],"automation,":[22],"disaster":[24],"response.":[25],"Traditional":[26],"detection":[28,91,164,168],"models":[29],"are":[30],"inherently":[31],"limited":[32,194],"predefined":[34],"categories,":[35],"making":[36],"them":[37],"unsuitable":[38],"for":[39,82,133,161],"open-ended":[40],"search":[42],"tasks":[43],"where":[44],"the":[45,69,111,143],"target":[46],"is":[48,124],"specified":[49,102],"dynamically.":[50],"In":[51],"this":[52],"work,":[53],"we":[54,64],"propose":[55],"ROVER,":[56],"an":[57,174],"end-to-end":[58],"open-vocabulary":[59,207],"framework":[62,198],"which":[63],"implement":[65],"test":[67],"on":[68,127,136,142,203],"AgileX":[70],"LIMO":[71],"UGV":[72],"environments.":[75,94],"ROVER":[76,157],"integrates":[77],"frontier-based":[78],"exploration,":[79],"RTABMap":[80],"SLAM":[81],"real-time":[83,134],"localization":[84],"dynamic":[86,215],"navigation,":[87],"language-driven":[89,211],"unexplored":[93,217],"The":[95],"system":[96],"autonomously":[97],"explores,":[98],"dynamically":[99],"detects":[100],"objects":[101],"via":[103],"natural":[104],"language":[105],"input,":[106],"navigates":[108],"precisely":[109],"detected":[112],"object\u2019s":[113],"location":[114],"using":[115,185],"depth-based":[116],"coordinate":[117],"estimation.":[118],"Unlike":[119,191],"prior":[120,192],"works,":[121],"our":[122,197],"approach":[123],"fully":[125],"deployable":[126],"resource-constrained":[128],"mobile":[129],"optimized":[132],"execution":[135],"embedded":[137],"hardware.":[138],"Our":[139],"ported":[140],"model":[141],"Orin":[144],"Nano":[145],"edge":[146],"platform":[147],"runs":[148],"at":[149],"around":[150],"a":[151],"Hz.":[152],"Extensive":[153],"experiments":[155],"demonstrate":[156],"achieves":[158],"100%":[159],"precision/recall":[160],"small":[162],"(screws/kettles),":[165],"91.3%":[166],"chair":[167],"precision":[169],"cluttered":[171],"environments,":[172],"average":[175],"95.5%":[176],"F1-score,":[177],"outperforming":[178],"YOLO-World":[179],"baselines":[180],"by":[181],"13.1":[182],"percentage":[183],"points":[184],"Grounding":[186],"DINO":[187],"real-demonstration":[189],"settings.":[190,218],"works":[193],"simulations,":[196],"demonstrates":[199],"full":[200],"operational":[201],"capability":[202],"physical":[204],"with":[206],"robotic":[208],"perception,":[209],"proving":[210],"navigation":[213]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
