{"id":"https://openalex.org/W7137975686","doi":"https://doi.org/10.48550/arxiv.2603.13295","title":"ICPRL: Acquiring Physical Intuition from Interactive Control","display_name":"ICPRL: Acquiring Physical Intuition from Interactive Control","publication_year":2026,"publication_date":"2026-03-01","ids":{"openalex":"https://openalex.org/W7137975686","doi":"https://doi.org/10.48550/arxiv.2603.13295"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13295","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13295","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13295","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129749102","display_name":"Xinrun Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Xinrun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129648717","display_name":"Pi Bu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bu, Pi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129687308","display_name":"Ye Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015011965","display_name":"B\u00f6rje F. Karlsson","orcid":"https://orcid.org/0000-0001-8925-360X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karlsson, B\u00f6rje F.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129722463","display_name":"Ziming Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ziming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059065177","display_name":"Tengtao Song","orcid":"https://orcid.org/0009-0000-7947-3473"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Tengtao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129722128","display_name":"Qi Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Qi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129653548","display_name":"Jun Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129666828","display_name":"Shuo Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129719938","display_name":"Zhiming Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Zhiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129715522","display_name":"Bo Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Bo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5129749102"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3449000120162964,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3449000120162964,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2890999913215637,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.06909999996423721,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6561999917030334},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.6538000106811523},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5602999925613403},{"id":"https://openalex.org/keywords/physical-system","display_name":"Physical system","score":0.398499995470047},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.3637999892234802},{"id":"https://openalex.org/keywords/dreyfus-model-of-skill-acquisition","display_name":"Dreyfus model of skill acquisition","score":0.3531000018119812}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6837999820709229},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6561999917030334},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.6538000106811523},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5602999925613403},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4763000011444092},{"id":"https://openalex.org/C116672817","wikidata":"https://www.wikidata.org/wiki/Q1454986","display_name":"Physical system","level":2,"score":0.398499995470047},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3953000009059906},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3637999892234802},{"id":"https://openalex.org/C132758656","wikidata":"https://www.wikidata.org/wiki/Q5307365","display_name":"Dreyfus model of skill acquisition","level":2,"score":0.3531000018119812},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.34630000591278076},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30149999260902405},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.2583000063896179}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13295","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13295","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13295","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13295","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"VLMs":[0,65],"excel":[1],"at":[2],"static":[3],"perception":[4],"but":[5],"falter":[6],"in":[7,10,45,116,168,190],"interactive":[8,208],"reasoning":[9,24,128],"dynamic":[11,20],"physical":[12,23,68,127,192,205],"environments,":[13,193],"which":[14],"demands":[15],"planning":[16],"and":[17,38,70,181],"adaptation":[18],"to":[19,36,66,98,149,155],"outcomes.":[21],"Existing":[22],"methods":[25],"often":[26],"depend":[27],"on":[28,103,162],"abstract":[29],"symbolic":[30],"inputs":[31],"or":[32],"lack":[33],"the":[34,96,131,138,144,157,163,169,203],"ability":[35],"learn":[37],"adapt":[39,71,99],"from":[40,207],"direct,":[41],"pixel-based":[42],"visual":[43],"interaction":[44,92],"novel":[46],"scenarios.":[47],"We":[48],"introduce":[49],"ICPRL":[50,172],"(In-Context":[51],"Physical":[52],"Reinforcement":[53,60],"Learning),":[54],"a":[55,78,119,151],"framework":[56,197],"inspired":[57],"by":[58,101,129],"In-Context":[59],"Learning":[61],"(ICRL)":[62],"that":[63,124,195],"empowers":[64],"acquire":[67],"intuition":[69],"their":[72],"policies":[73],"in-context.":[74],"Our":[75],"approach":[76],"trains":[77],"vision-grounded":[79],"policy":[80,114,139],"model":[81,123,146],"via":[82],"multi-turn":[83],"Group":[84],"Relative":[85],"Policy":[86],"Optimization":[87],"(GRPO)":[88],"over":[89],"diverse":[90,164],"multi-episode":[91],"histories.":[93],"This":[94,112],"enables":[95],"agent":[97],"strategies":[100],"conditioning":[102],"past":[104],"trial-and-error":[105],"sequences,":[106],"without":[107],"requiring":[108],"any":[109],"weight":[110],"updates.":[111],"adaptive":[113],"works":[115],"concert":[117],"with":[118],"separately":[120],"trained":[121],"world":[122,145],"provides":[125],"explicit":[126],"predicting":[130],"results":[132],"of":[133,202],"potential":[134],"actions.":[135],"At":[136],"inference,":[137],"proposes":[140],"candidate":[141],"actions,":[142],"while":[143],"predicts":[147],"outcomes":[148],"guide":[150],"root-node":[152],"PUCT":[153],"search":[154],"select":[156],"most":[158],"promising":[159],"action.":[160],"Evaluated":[161],"physics-based":[165],"puzzle-solving":[166],"tasks":[167],"DeepPHY":[170],"benchmark,":[171],"demonstrates":[173],"significant":[174],"improvements":[175],"across":[176],"both":[177],"its":[178],"I.":[179],"policy-only,":[180],"II.":[182],"world-model-augmented":[183],"stages.":[184],"Notably,":[185],"these":[186],"gains":[187],"are":[188],"retained":[189],"unseen":[191],"demonstrating":[194],"our":[196],"facilitates":[198],"genuine":[199],"in-context":[200],"acquisition":[201],"environment's":[204],"dynamics":[206],"experience.":[209]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
