{"id":"https://openalex.org/W7154563029","doi":"https://doi.org/10.48550/arxiv.2604.13993","title":"Reward Design for Physical Reasoning in Vision-Language Models","display_name":"Reward Design for Physical Reasoning in Vision-Language Models","publication_year":2026,"publication_date":"2026-04-15","ids":{"openalex":"https://openalex.org/W7154563029","doi":"https://doi.org/10.48550/arxiv.2604.13993"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.13993","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13993","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.13993","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133732697","display_name":"Derek Lilienthal","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lilienthal, Derek","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133762293","display_name":"Manisha Mukherjee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mukherjee, Manisha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5087089195","display_name":"Sameera Horawalavithana","orcid":"https://orcid.org/0000-0002-0327-3819"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Horawalavithana, Sameera","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5133732697"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9453999996185303,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9453999996185303,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.006200000178068876,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.005400000140070915,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rubric","display_name":"Rubric","score":0.6969000101089478},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.620199978351593},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.6176000237464905},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4194999933242798},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.40209999680519104},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.350600004196167}],"concepts":[{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.6969000101089478},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.620199978351593},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.6176000237464905},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.599399983882904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5153999924659729},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4194999933242798},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.40209999680519104},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3953000009059906},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.350600004196167},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.31850001215934753},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.30649998784065247},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.3012999892234802},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.26660001277923584},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.26440000534057617},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.2624000012874603},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.13993","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13993","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.13993","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13993","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4129329025745392,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Physical":[0],"reasoning":[1,50,61,132,177,190,200],"over":[2,115],"visual":[3,9],"inputs":[4],"demands":[5],"tight":[6],"integration":[7],"of":[8,27,85],"perception,":[10],"domain":[11],"knowledge,":[12],"and":[13,41,102,105,130,136,164,215],"multi-step":[14],"symbolic":[15,205],"inference.":[16],"Yet":[17],"even":[18],"state-of-the-art":[19],"Vision":[20,142],"Language":[21],"Models":[22],"(VLMs)":[23],"fall":[24],"far":[25],"short":[26],"human":[28],"performance":[29,203],"on":[30,77,121,154],"physics":[31,99,128],"benchmarks.":[32],"While":[33],"post-training":[34],"algorithms":[35],"such":[36],"as":[37],"Supervised":[38],"Fine-Tuning":[39],"(SFT)":[40],"Group":[42],"Relative":[43],"Policy":[44],"Optimization":[45],"(GRPO)":[46],"have":[47],"demonstrated":[48],"strong":[49],"gains":[51,158],"in":[52,204],"language":[53],"models,":[54],"how":[55],"reward":[56,70,83,96,109,162,210],"design":[57,167],"shapes":[58],"VLM":[59,75],"physical":[60,78,240],"behavior":[62],"remains":[63],"poorly":[64],"understood.":[65],"We":[66,80,119],"present":[67],"a":[68,93,106,123,234],"systematic":[69],"ablation":[71],"study":[72],"for":[73,237],"GRPO-based":[74],"training":[76],"reasoning.":[79,241],"compare":[81],"four":[82],"signals":[84],"increasing":[86],"semantic":[87],"richness:":[88],"format":[89],"compliance,":[90],"answer":[91],"accuracy,":[92],"composite":[94],"rubric":[95],"(answer":[97],"correctness,":[98],"principle":[100],"identification,":[101],"unit":[103],"consistency),":[104],"novel":[107],"internal":[108,208],"derived":[110],"from":[111,220],"model":[112,229],"attention":[113],"weights":[114],"input":[116],"image":[117],"regions.":[118],"evaluate":[120],"PhyX,":[122],"3,000-problem":[124],"benchmark":[125],"spanning":[126],"six":[127,131],"domains":[129],"types":[133],"across":[134],"multiple-choice":[135],"open-ended":[137],"formats,":[138,147],"using":[139],"IBM":[140],"Granite":[141],"3.3":[143],"(2B).":[144],"Across":[145],"both":[146],"GRPO":[148],"with":[149],"accuracy-based":[150],"rewards":[151,180,187,197],"outperforms":[152],"SFT":[153],"most":[155],"domains,":[156],"though":[157],"vary":[159],"substantially":[160],"by":[161],"type":[163],"domain.":[165],"Reward":[166],"does":[168],"not":[169],"uniformly":[170],"improve":[171,188],"performance.":[172],"Instead,":[173],"it":[174],"induces":[175],"domain-specific":[176],"behaviors.":[178],"Accuracy-based":[179],"provide":[181],"the":[182,228],"strongest":[183],"overall":[184],"gains.":[185],"Rubric":[186],"structured":[189],"quality":[191],"without":[192],"consistent":[193],"accuracy":[194,219],"improvements.":[195],"Attention-based":[196],"enhance":[198],"spatial":[199,213,217],"while":[201],"degrading":[202],"domains.":[206],"Our":[207],"attention-weight":[209],"requires":[211],"no":[212],"annotations":[214],"improves":[216],"relation":[218],"0.27":[221],"to":[222],"0.50,":[223],"suggesting":[224],"that":[225],"supervising":[226],"where":[227],"attends":[230],"during":[231],"generation":[232],"is":[233],"promising":[235],"direction":[236],"visually":[238],"grounded":[239]},"counts_by_year":[],"updated_date":"2026-04-17T06:04:52.305304","created_date":"2026-04-17T00:00:00"}
