{"id":"https://openalex.org/W7131062311","doi":"https://doi.org/10.1109/iccvw69036.2025.00719","title":"Few-Shot Vision-Language Reasoning for Satellite Imagery via Verifiable Rewards","display_name":"Few-Shot Vision-Language Reasoning for Satellite Imagery via Verifiable Rewards","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W7131062311","doi":"https://doi.org/10.1109/iccvw69036.2025.00719"},"language":null,"primary_location":{"id":"doi:10.1109/iccvw69036.2025.00719","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00719","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126657094","display_name":"Aybora K\u00f6ksal","orcid":null},"institutions":[{"id":"https://openalex.org/I201799495","display_name":"Middle East Technical University","ror":"https://ror.org/014weej12","country_code":"TR","type":"education","lineage":["https://openalex.org/I201799495"]}],"countries":["TR"],"is_corresponding":true,"raw_author_name":"Aybora K\u00f6ksal","raw_affiliation_strings":["Middle East Technical University (METU),Center for Image Analysis (OGAM),Dept. of Electrical and Electronics Engineering,Ankara,Turkey"],"affiliations":[{"raw_affiliation_string":"Middle East Technical University (METU),Center for Image Analysis (OGAM),Dept. of Electrical and Electronics Engineering,Ankara,Turkey","institution_ids":["https://openalex.org/I201799495"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126592528","display_name":"A. Aydin Alatan","orcid":null},"institutions":[{"id":"https://openalex.org/I201799495","display_name":"Middle East Technical University","ror":"https://ror.org/014weej12","country_code":"TR","type":"education","lineage":["https://openalex.org/I201799495"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"A. Aydin Alatan","raw_affiliation_strings":["Middle East Technical University (METU),Center for Image Analysis (OGAM),Dept. of Electrical and Electronics Engineering,Ankara,Turkey"],"affiliations":[{"raw_affiliation_string":"Middle East Technical University (METU),Center for Image Analysis (OGAM),Dept. of Electrical and Electronics Engineering,Ankara,Turkey","institution_ids":["https://openalex.org/I201799495"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5126657094"],"corresponding_institution_ids":["https://openalex.org/I201799495"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.74566432,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6960","last_page":"6969"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.44269999861717224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.44269999861717224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.16210000216960907,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.0868000015616417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.597599983215332},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.510699987411499},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.5005000233650208},{"id":"https://openalex.org/keywords/satellite-imagery","display_name":"Satellite imagery","score":0.4848000109195709},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.4700999855995178},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.45989999175071716},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4032000005245209},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.38100001215934753}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7860000133514404},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.597599983215332},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5490999817848206},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.510699987411499},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.5005000233650208},{"id":"https://openalex.org/C2778102629","wikidata":"https://www.wikidata.org/wiki/Q725252","display_name":"Satellite imagery","level":2,"score":0.4848000109195709},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.4700999855995178},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4699999988079071},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.45989999175071716},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4032000005245209},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.38100001215934753},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3749000132083893},{"id":"https://openalex.org/C19269812","wikidata":"https://www.wikidata.org/wiki/Q26540","display_name":"Satellite","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2906999886035919},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C183365957","wikidata":"https://www.wikidata.org/wiki/Q17140402","display_name":"Remote sensing application","level":3,"score":0.274399995803833},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26089999079704285}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccvw69036.2025.00719","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00719","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W4385245566","https://openalex.org/W4395041667","https://openalex.org/W4399399400","https://openalex.org/W4402713111","https://openalex.org/W4402776460","https://openalex.org/W4404536218","https://openalex.org/W4409367667","https://openalex.org/W4409383105"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2],"large":[3],"language":[4,66],"and":[5,28,101,145,156,163,170,196,204],"vision-language":[6,69,175],"models":[7,67,122],"have":[8],"enabled":[9],"strong":[10],"reasoning":[11,88,176],"capabilities,":[12],"yet":[13],"they":[14],"remain":[15],"impractical":[16],"for":[17,42,49,86,182],"specialized":[18],"domains":[19],"like":[20],"remote":[21,94],"sensing,":[22],"where":[23],"annotated":[24,127],"data":[25],"is":[26],"scarce":[27],"expensive.":[29],"We":[30],"present":[31],"the":[32,47,61,112,130],"first":[33],"few-shot":[34],"reinforcement":[35],"learning":[36],"with":[37,75],"veri-fiable":[38],"reward":[39],"(RLVR)":[40],"framework":[41],"satellite":[43,87],"imagery":[44],"that":[45,103,153],"eliminates":[46],"need":[48],"caption":[50],"supervision-relying":[51],"solely":[52],"on":[53,124],"lightweight,":[54],"rule-based":[55],"binary":[56],"or":[57,120],"IoU-based":[58],"rewards.":[59],"Adapting":[60],"\u201c1-shot":[62],"RLVR\u201d":[63],"paradigm":[64],"from":[65,186],"to":[68,82,116],"models,":[70,177],"we":[71,151],"employ":[72],"policy-gradient":[73],"optimization":[74],"as":[76,78],"few":[77],"one":[79],"curated":[80],"example":[81,107],"align":[83],"model":[84],"outputs":[85],"tasks.":[89,149],"Comprehensive":[90],"experiments":[91],"across":[92,147],"multiple":[93],"sensing":[95],"benchmarks-including":[96],"classification,":[97],"visual":[98],"question":[99],"answering,":[100],"grounding-show":[102],"even":[104],"a":[105,179,187,191],"single":[106],"yields":[108],"substantial":[109],"improvements":[110],"over":[111],"base":[113],"model.":[114],"Scaling":[115],"128":[117],"examples":[118],"matches":[119],"exceeds":[121],"trained":[123],"thousands":[125],"of":[126,173,193],"samples.":[128],"While":[129],"extreme":[131],"one-shot":[132],"setting":[133],"can":[134],"induce":[135],"mild,":[136],"task-specific":[137],"overfitting,":[138],"our":[139],"approach":[140],"consistently":[141],"demonstrates":[142],"robust":[143],"generalization":[144],"efficiency":[146],"diverse":[148],"Further,":[150],"find":[152],"prompt":[154],"design":[155],"loss":[157],"weighting":[158],"significantly":[159],"influence":[160],"training":[161,202],"stability":[162],"final":[164],"accuracy.":[165],"Our":[166,200],"method":[167],"enables":[168],"cost-effective":[169],"data-efficient":[171],"development":[172],"domain-specialist":[174],"offering":[178],"pragmatic":[180],"recipe":[181],"data-scarce":[183],"fields:":[184],"start":[185],"compact":[188],"VLM,":[189],"curate":[190],"handful":[192],"reward-checkable":[194],"cases,":[195],"train":[197],"via":[198],"RLVR.":[199],"model,":[201],"code":[203],"dataset":[205],"will":[206],"be":[207],"at":[208],"https://github.com/aybora/FewShotReasoning.":[209]},"counts_by_year":[],"updated_date":"2026-02-25T06:17:34.324206","created_date":"2026-02-24T00:00:00"}
