{"id":"https://openalex.org/W4405254486","doi":"https://doi.org/10.1109/iccv51701.2025.01250","title":"CARP: Visuomotor Policy Learning via Coarse-to-Fine Autoregressive Prediction","display_name":"CARP: Visuomotor Policy Learning via Coarse-to-Fine Autoregressive Prediction","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4405254486","doi":"https://doi.org/10.1109/iccv51701.2025.01250"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.01250","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01250","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.06782","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zhefei Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhefei Gong","raw_affiliation_strings":["Westlake University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Westlake University","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060665856","display_name":"Pengxiang Ding","orcid":"https://orcid.org/0000-0002-4049-7467"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengxiang Ding","raw_affiliation_strings":["Westlake University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Westlake University","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075248381","display_name":"Shangke Lyu","orcid":"https://orcid.org/0000-0002-8302-6630"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shangke Lyu","raw_affiliation_strings":["Westlake University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Westlake University","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043876209","display_name":"Siteng Huang","orcid":"https://orcid.org/0000-0002-9735-1186"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siteng Huang","raw_affiliation_strings":["Westlake University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Westlake University","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079378336","display_name":"Mingyang Sun","orcid":"https://orcid.org/0000-0002-5790-5025"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyang Sun","raw_affiliation_strings":["Westlake University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Westlake University","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009949085","display_name":"Wei Zhao","orcid":"https://orcid.org/0000-0002-3679-6165"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zhao","raw_affiliation_strings":["Westlake University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Westlake University","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021141988","display_name":"Zhaoxin Fan","orcid":"https://orcid.org/0000-0002-6324-1712"},"institutions":[{"id":"https://openalex.org/I4210165198","display_name":"Beijing Advanced Sciences and Innovation Center","ror":"https://ror.org/05qm21180","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165198"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoxin Fan","raw_affiliation_strings":["Beijing Advanced Innovation Center for Future Blockchain and Privacy Computing"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Advanced Innovation Center for Future Blockchain and Privacy Computing","institution_ids":["https://openalex.org/I4210165198"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100665183","display_name":"Donglin Wang","orcid":"https://orcid.org/0000-0003-1359-6440"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Donglin Wang","raw_affiliation_strings":["Westlake University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Westlake University","institution_ids":["https://openalex.org/I3133055985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I3133055985"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00084758,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"13460","last_page":"13470"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9681000113487244,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11184","display_name":"Neonatal and fetal brain pathology","score":0.948199987411499,"subfield":{"id":"https://openalex.org/subfields/2735","display_name":"Pediatrics, Perinatology and Child Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.8381994366645813},{"id":"https://openalex.org/keywords/carp","display_name":"Carp","score":0.6818889379501343},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.4217560291290283},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39355936646461487},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3911765217781067},{"id":"https://openalex.org/keywords/environmental-science","display_name":"Environmental science","score":0.34156298637390137},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3284137547016144},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.32537946105003357},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.27857446670532227},{"id":"https://openalex.org/keywords/fishery","display_name":"Fishery","score":0.21800321340560913},{"id":"https://openalex.org/keywords/fish-actinopterygii","display_name":"Fish <Actinopterygii>","score":0.15488800406455994},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.14506232738494873}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.8381994366645813},{"id":"https://openalex.org/C2778891732","wikidata":"https://www.wikidata.org/wiki/Q2751223","display_name":"Carp","level":3,"score":0.6818889379501343},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.4217560291290283},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39355936646461487},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3911765217781067},{"id":"https://openalex.org/C39432304","wikidata":"https://www.wikidata.org/wiki/Q188847","display_name":"Environmental science","level":0,"score":0.34156298637390137},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3284137547016144},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.32537946105003357},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27857446670532227},{"id":"https://openalex.org/C505870484","wikidata":"https://www.wikidata.org/wiki/Q180538","display_name":"Fishery","level":1,"score":0.21800321340560913},{"id":"https://openalex.org/C2909208804","wikidata":"https://www.wikidata.org/wiki/Q127282","display_name":"Fish <Actinopterygii>","level":2,"score":0.15488800406455994},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.14506232738494873}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.01250","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01250","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2412.06782","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.06782","pdf_url":"https://arxiv.org/pdf/2412.06782","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.06782","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.06782","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.06782","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.06782","pdf_url":"https://arxiv.org/pdf/2412.06782","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2171218219","https://openalex.org/W2039413894","https://openalex.org/W2019067380","https://openalex.org/W1972271943","https://openalex.org/W2150410159","https://openalex.org/W4327525404","https://openalex.org/W4287185323","https://openalex.org/W3150905897","https://openalex.org/W1520183331","https://openalex.org/W2156628102"],"abstract_inverted_index":{"In":[0,40],"robotic":[1,182],"visuomotor":[2,53],"policy":[3,54],"learning,":[4],"diffusion-based":[5,118],"models":[6],"have":[7],"achieved":[8],"significant":[9],"success":[10,154],"in":[11,181],"improving":[12],"the":[13,58,83,92,115],"accuracy":[14],"of":[15,82,117],"action":[16,60,70,77,85,179],"trajectory":[17],"generation":[18,61,71,180],"compared":[19,167],"to":[20,30,158,168],"traditional":[21],"autoregressive":[22,59,98,126],"models.":[23],"However,":[24],"they":[25],"suffer":[26],"from":[27,37],"inefficiency":[28],"due":[29],"multiple":[31],"denoising":[32],"steps":[33],"and":[34,102,108,137,142,162,175],"limited":[35],"flexibility":[36],"complex":[38],"constraints.":[39],"this":[41],"paper,":[42],"we":[43],"introduce":[44],"Coarse-to-Fine":[45],"AutoRegressive":[46],"Policy":[47],"(CARP),":[48],"a":[49,64,88,96,159,172],"novel":[50],"paradigm":[51,177],"for":[52,178],"learning":[55],"that":[56],"redefines":[57],"process":[62],"as":[63,146,148],"coarse-to-fine,":[65],"next-scale":[66],"approach.":[67],"CARP":[68,151],"decouples":[69],"into":[72],"two":[73],"stages:":[74],"first,":[75],"an":[76],"autoencoder":[78],"learns":[79],"multi-scale":[80],"representations":[81],"entire":[84],"sequence;":[86],"then,":[87],"GPT-style":[89],"transformer":[90],"refines":[91],"sequence":[93],"prediction":[94],"through":[95],"coarse-to-fine":[97],"process.":[99],"This":[100],"straightforward":[101],"intuitive":[103],"approach":[104],"produces":[105],"highly":[106],"accurate":[107],"smooth":[109],"actions,":[110],"matching":[111],"or":[112],"even":[113],"surpassing":[114],"performance":[116],"policies":[119],"while":[120],"maintaining":[121],"efficiency":[122],"on":[123,140],"par":[124],"with":[125,156],"policies.":[127],"We":[128],"conduct":[129],"extensive":[130],"evaluations":[131],"across":[132],"diverse":[133],"settings,":[134],"including":[135],"single-task":[136],"multi-task":[138],"scenarios":[139],"state-based":[141],"image-based":[143],"simulation":[144],"benchmarks,":[145],"well":[147],"real-world":[149],"tasks.":[150,183],"achieves":[152],"competitive":[153],"rates,":[155],"up":[157],"10%":[160],"improvement,":[161],"delivers":[163],"10x":[164],"faster":[165],"inference":[166],"state-of-the-art":[169],"policies,":[170],"establishing":[171],"high-performance,":[173],"efficient,":[174],"flexible":[176]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
