{"id":"https://openalex.org/W4410140555","doi":"https://doi.org/10.1145/3716550.3722033","title":"RLS3: RL-Based Synthetic Sample Selection to Enhance Spatial Reasoning in Vision-Language Models for Indoor Autonomous Perception","display_name":"RLS3: RL-Based Synthetic Sample Selection to Enhance Spatial Reasoning in Vision-Language Models for Indoor Autonomous Perception","publication_year":2025,"publication_date":"2025-05-06","ids":{"openalex":"https://openalex.org/W4410140555","doi":"https://doi.org/10.1145/3716550.3722033"},"language":"en","primary_location":{"id":"doi:10.1145/3716550.3722033","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3716550.3722033","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3716550.3722033","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM/IEEE 16th International Conference on Cyber-Physical Systems (with CPS-IoT Week 2025)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3716550.3722033","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009697582","display_name":"Joshua R. Waite","orcid":"https://orcid.org/0000-0003-4187-6167"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Joshua R. Waite","raw_affiliation_strings":["Iowa State University, Ames, IA, USA"],"raw_orcid":"https://orcid.org/0000-0003-4187-6167","affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065627917","display_name":"Md Zahid Hasan","orcid":"https://orcid.org/0009-0000-3213-2719"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Md Zahid Hasan","raw_affiliation_strings":["Iowa State University, Ames, IA, USA"],"raw_orcid":"https://orcid.org/0009-0000-3213-2719","affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010702633","display_name":"Qisai Liu","orcid":"https://orcid.org/0000-0002-9553-0789"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qisai Liu","raw_affiliation_strings":["Iowa State University, Ames, IA, USA"],"raw_orcid":"https://orcid.org/0000-0002-9553-0789","affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031212270","display_name":"Zhanhong Jiang","orcid":"https://orcid.org/0000-0001-5363-7898"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhanhong Jiang","raw_affiliation_strings":["Iowa State University, Ames, IA, USA"],"raw_orcid":"https://orcid.org/0000-0001-5363-7898","affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066142047","display_name":"Chinmay Hegde","orcid":"https://orcid.org/0000-0003-4574-8066"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chinmay Hegde","raw_affiliation_strings":["New York University, New York, NY, USA"],"raw_orcid":"https://orcid.org/0000-0003-4574-8066","affiliations":[{"raw_affiliation_string":"New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081037761","display_name":"Soumik Sarkar","orcid":"https://orcid.org/0000-0002-6775-9199"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Soumik Sarkar","raw_affiliation_strings":["Iowa State University, Ames, IA, USA"],"raw_orcid":"https://orcid.org/0000-0002-6775-9199","affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5009697582"],"corresponding_institution_ids":["https://openalex.org/I173911158"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0736182,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.7343460321426392},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.719836950302124},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6955723166465759},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6823521852493286},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6139056086540222},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.5718896389007568},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4678899347782135},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.10884314775466919}],"concepts":[{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.7343460321426392},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.719836950302124},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6955723166465759},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6823521852493286},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6139056086540222},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.5718896389007568},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4678899347782135},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.10884314775466919},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3716550.3722033","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3716550.3722033","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3716550.3722033","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM/IEEE 16th International Conference on Cyber-Physical Systems (with CPS-IoT Week 2025)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3716550.3722033","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3716550.3722033","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3716550.3722033","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM/IEEE 16th International Conference on Cyber-Physical Systems (with CPS-IoT Week 2025)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2023424935","display_name":null,"funder_award_id":"CNS-1845969","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3795007027","display_name":null,"funder_award_id":"CPS Frontier CNS-1954556","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G5824456953","display_name":null,"funder_award_id":"CAREER CNS-1845969","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G6671297155","display_name":null,"funder_award_id":"CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7305207087","display_name":null,"funder_award_id":"1845969","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410140555.pdf","grobid_xml":"https://content.openalex.org/works/W4410140555.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W2561715562","https://openalex.org/W2956371155","https://openalex.org/W2963201472","https://openalex.org/W2968088687","https://openalex.org/W3088310808","https://openalex.org/W3120778962","https://openalex.org/W3216772467","https://openalex.org/W4290945397","https://openalex.org/W4319317403","https://openalex.org/W4379474731","https://openalex.org/W4393972732","https://openalex.org/W4396982312","https://openalex.org/W4401417048","https://openalex.org/W4402716288","https://openalex.org/W4404612908","https://openalex.org/W4405362916"],"related_works":["https://openalex.org/W2124759987","https://openalex.org/W1999714552","https://openalex.org/W2628861693","https://openalex.org/W4205762803","https://openalex.org/W2535856026","https://openalex.org/W2265065644","https://openalex.org/W2167086559","https://openalex.org/W2153102768","https://openalex.org/W1593007803","https://openalex.org/W3203087560"],"abstract_inverted_index":{"Vision-language":[0],"model":[1],"(VLM)":[2],"fine-tuning":[3,27,51,66,94],"for":[4,21,93],"application-specific":[5],"visual":[6],"grounding":[7],"based":[8],"on":[9,30],"natural":[10],"language":[11],"instructions":[12],"has":[13],"become":[14],"one":[15],"of":[16,99,107,136,177,222,228],"the":[17,79,100,105,108,114,124,127,144,156,169,175,178,212,218,226],"most":[18],"popular":[19],"approaches":[20],"learning-enabled":[22],"autonomous":[23],"systems.":[24],"However,":[25],"such":[26],"relies":[28],"heavily":[29],"high-quality":[31],"datasets":[32],"to":[33,47,63,82,89,95,110,113,117,160,173,195],"achieve":[34],"successful":[35],"performance":[36,106,162,221],"in":[37,158,233],"various":[38],"downstream":[39],"tasks.":[40,235],"Additionally,":[41],"VLMs":[42],"often":[43],"encounter":[44],"limitations":[45],"due":[46],"insufficient":[48],"and":[49,154,163,202],"imbalanced":[50],"data.":[52],"To":[53],"address":[54,96,164,174],"these":[55],"issues,":[56],"we":[57,103,180],"propose":[58],"a":[59,71,141,184],"new":[60],"generalizable":[61],"framework":[62,142],"improve":[64],"VLM":[65,109,125,157],"by":[67],"integrating":[68],"it":[69],"with":[70],"reinforcement":[72],"learning":[73],"(RL)":[74],"agent.":[75],"Our":[76,208],"method":[77],"utilizes":[78],"RL":[80,115,145],"agent":[81,116,146],"manipulate":[83],"objects":[84],"within":[85],"an":[86,149],"indoor":[87],"setting":[88],"create":[90],"synthetic":[91,191],"data":[92,120,151,170,192,214,231],"certain":[97],"vulnerabilities":[98],"VLM.":[101],"Specifically,":[102],"use":[104],"provide":[111],"feedback":[112],"generate":[118,203],"informative":[119,150],"that":[121,211],"efficiently":[122],"fine-tune":[123],"over":[126,199],"targeted":[128],"task":[129],"(e.g.":[130],"spatial":[131,219],"reasoning).":[132],"The":[133],"key":[134],"contribution":[135],"this":[137],"work":[138],"is":[139],"developing":[140],"where":[143],"serves":[147],"as":[148],"sampling":[152,171],"tool":[153],"assists":[155],"order":[159],"enhance":[161],"task-specific":[165],"vulnerabilities.":[166],"By":[167],"targeting":[168],"process":[172],"weaknesses":[176],"VLM,":[179],"can":[181],"effectively":[182],"train":[183],"more":[185],"context-aware":[186],"model.":[187],"In":[188],"addition,":[189],"generating":[190],"allows":[193],"us":[194],"have":[196],"precise":[197],"control":[198],"each":[200],"scene":[201],"granular":[204],"ground":[205],"truth":[206],"captions.":[207],"results":[209],"show":[210],"proposed":[213],"generation":[215,232],"approach":[216],"improves":[217],"reasoning":[220],"VLMs,":[223],"which":[224],"demonstrates":[225],"benefits":[227],"using":[229],"RL-guided":[230],"vision-language":[234]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
