{"id":"https://openalex.org/W7134283392","doi":"https://doi.org/10.48550/arxiv.2603.05868","title":"AnyCamVLA: Zero-Shot Camera Adaptation for Viewpoint Robust Vision-Language-Action Models","display_name":"AnyCamVLA: Zero-Shot Camera Adaptation for Viewpoint Robust Vision-Language-Action Models","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7134283392","doi":"https://doi.org/10.48550/arxiv.2603.05868"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.05868","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000108506","display_name":"Hwa\u2010Young Heo","orcid":"https://orcid.org/0009-0009-2934-1064"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Heo, Hyeongjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128460015","display_name":"Seungyeon Woo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Woo, Seungyeon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128422228","display_name":"Sang Min Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Sang Min","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128576748","display_name":"Junho Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Junho","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128414726","display_name":"Junho Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Junho","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128572739","display_name":"Yonghyeon Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Yonghyeon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128460971","display_name":"Young Duck Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Young Min","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5000108506"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.5322999954223633,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.5322999954223633,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.163100004196167,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.048700001090765,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7580999732017517},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4875999987125397},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4481000006198883},{"id":"https://openalex.org/keywords/single-camera","display_name":"Single camera","score":0.42750000953674316},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.41440001130104065},{"id":"https://openalex.org/keywords/camera-auto-calibration","display_name":"Camera auto-calibration","score":0.4140999913215637},{"id":"https://openalex.org/keywords/mobile-device","display_name":"Mobile device","score":0.36910000443458557}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7580999732017517},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6951000094413757},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6861000061035156},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6507999897003174},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4875999987125397},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4481000006198883},{"id":"https://openalex.org/C3018868555","wikidata":"https://www.wikidata.org/wiki/Q2918907","display_name":"Single camera","level":2,"score":0.42750000953674316},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.41440001130104065},{"id":"https://openalex.org/C94816000","wikidata":"https://www.wikidata.org/wiki/Q5026006","display_name":"Camera auto-calibration","level":3,"score":0.4140999913215637},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.36910000443458557},{"id":"https://openalex.org/C161334170","wikidata":"https://www.wikidata.org/wiki/Q1428778","display_name":"Smart camera","level":2,"score":0.36880001425743103},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.3411000072956085},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.32190001010894775},{"id":"https://openalex.org/C2983761899","wikidata":"https://www.wikidata.org/wiki/Q604674","display_name":"Robot vision","level":4,"score":0.28700000047683716},{"id":"https://openalex.org/C110898773","wikidata":"https://www.wikidata.org/wiki/Q2933935","display_name":"Camera resectioning","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.2667999863624573},{"id":"https://openalex.org/C150415221","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic arm","level":2,"score":0.26649999618530273},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.26100000739097595}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.05868","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.05868","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.05868","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.05868","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"remarkable":[1],"progress":[2],"in":[3,19,35,73,149],"Vision-Language-Action":[4],"models":[5,13,24],"(VLAs)":[6],"for":[7,129,136],"robot":[8],"manipulation,":[9],"these":[10],"large":[11],"pre-trained":[12,103],"require":[14],"fine-tuning":[15,131],"to":[16,28,61,67,109],"be":[17],"deployed":[18],"specific":[20],"environments.":[21,37],"These":[22],"fine-tuned":[23],"are":[25],"highly":[26],"sensitive":[27],"camera":[29,45,65,71,158],"viewpoint":[30,147],"changes":[31],"that":[32,125,142],"frequently":[33],"occur":[34],"unstructured":[36],"In":[38],"this":[39],"paper,":[40],"we":[41,77],"propose":[42],"a":[43,79],"zero-shot":[44],"adaptation":[46],"framework":[47],"without":[48],"additional":[49,133],"demonstration":[50],"data,":[51],"policy":[52,130],"fine-tuning,":[53],"or":[54,132],"architectural":[55],"modification.":[56],"Our":[57],"key":[58],"idea":[59],"is":[60],"virtually":[62],"adjust":[63],"test-time":[64],"observations":[66],"match":[68],"the":[69,102,117],"training":[70],"configuration":[72],"real-time.":[74],"For":[75],"that,":[76],"use":[78,126],"recent":[80],"feed-forward":[81],"novel":[82],"view":[83,90],"synthesis":[84],"model":[85],"which":[86],"outputs":[87],"high-quality":[88],"target":[89],"images,":[91],"handling":[92],"both":[93],"extrinsic":[94],"and":[95,107,161],"intrinsic":[96],"parameters.":[97],"This":[98],"plug-and-play":[99],"approach":[100,144],"preserves":[101],"capabilities":[104],"of":[105],"VLAs":[106],"applies":[108],"any":[110],"RGB-based":[111],"policy.":[112],"Through":[113],"extensive":[114],"experiments":[115],"on":[116],"LIBERO":[118],"benchmark,":[119],"our":[120,143],"method":[121],"consistently":[122],"outperforms":[123],"baselines":[124],"data":[127],"augmentation":[128],"3D-aware":[134],"features":[135],"visual":[137],"input.":[138],"We":[139],"further":[140],"validate":[141],"constantly":[145],"enhances":[146],"robustness":[148],"real-world":[150],"robotic":[151],"manipulation":[152],"scenarios,":[153],"including":[154],"settings":[155],"with":[156],"varying":[157],"extrinsics,":[159],"intrinsics,":[160],"freely":[162],"moving":[163],"handheld":[164],"cameras.":[165]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-10T00:00:00"}
