{"id":"https://openalex.org/W4416750874","doi":"https://doi.org/10.1109/iros60139.2025.11245886","title":"AlignCAPE: Support and Query Feature Aligning for Category-Agnostic Pose Estimation","display_name":"AlignCAPE: Support and Query Feature Aligning for Category-Agnostic Pose Estimation","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416750874","doi":"https://doi.org/10.1109/iros60139.2025.11245886"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11245886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018741960","display_name":"Zhuoran Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhuoran Chen","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033216492","display_name":"Jin Tang","orcid":"https://orcid.org/0000-0001-9065-4021"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Tang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088625346","display_name":"Guoliang Xu","orcid":"https://orcid.org/0009-0000-7547-5308"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Xu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768906","display_name":"Shaojie Zhang","orcid":"https://orcid.org/0000-0001-8168-7112"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaojie Zhang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100456093","display_name":"Zhicheng Zhang","orcid":"https://orcid.org/0000-0003-4241-0588"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhicheng Zhang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102080920","display_name":"Jianqin Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianqin Yin","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Intelligent Engineering and Automation,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5018741960"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.45064078,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"20669","last_page":"20676"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6585000157356262,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6585000157356262,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.09300000220537186,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07320000231266022,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7163000106811523},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6733999848365784},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.649399995803833},{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.5665000081062317},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.45879998803138733},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4528999924659729},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.42309999465942383},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4083999991416931}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7705000042915344},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7163000106811523},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6852999925613403},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6733999848365784},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.649399995803833},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.5665000081062317},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4675000011920929},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.45879998803138733},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4528999924659729},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.42309999465942383},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4083999991416931},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3725000023841858},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3359000086784363},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.29339998960494995},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29280000925064087},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2913999855518341},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.2842000126838684},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.2800000011920929},{"id":"https://openalex.org/C143271835","wikidata":"https://www.wikidata.org/wiki/Q254515","display_name":"Similitude","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11245886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2108598243","https://openalex.org/W2113325037","https://openalex.org/W2194775991","https://openalex.org/W2307770531","https://openalex.org/W2887114371","https://openalex.org/W2916798096","https://openalex.org/W2956121407","https://openalex.org/W2963402313","https://openalex.org/W2963474899","https://openalex.org/W2963488642","https://openalex.org/W2963789946","https://openalex.org/W2964105113","https://openalex.org/W2964105864","https://openalex.org/W2964304707","https://openalex.org/W3034399482","https://openalex.org/W3096609285","https://openalex.org/W3203925315","https://openalex.org/W3205249428","https://openalex.org/W4229890965","https://openalex.org/W4312914534","https://openalex.org/W4313036259","https://openalex.org/W4385245566","https://openalex.org/W4386076411","https://openalex.org/W4390871854","https://openalex.org/W4402753468","https://openalex.org/W4404545169"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,101,161,178,182],"category-agnostic":[3],"pose":[4],"estimation":[5],"have":[6],"focused":[7],"on":[8,39,166],"developing":[9],"a":[10,79,95],"unified":[11],"model":[12,90,177],"capable":[13],"of":[14,45,54,144],"localizing":[15],"keypoint":[16],"coordinates":[17],"across":[18],"arbitrary":[19],"categories,":[20],"which":[21],"enables":[22],"robots":[23],"to":[24,58,83,109,129,151,158],"accurately":[25],"interact":[26],"with":[27],"diverse":[28],"objects":[29],"by":[30,106,180],"understanding":[31],"their":[32],"poses.":[33],"While":[34],"existing":[35],"methods":[36],"predominantly":[37],"concentrate":[38],"local":[40],"features":[41],"surrounding":[42],"the":[43,46,52,62,69,73,89,102,131,142,153,162,174],"keypoints":[44,160],"support":[47,63,132],"image,":[48],"they":[49],"often":[50],"overlook":[51],"importance":[53],"global":[55],"features,":[56],"leading":[57],"potential":[59],"misalignment":[60,86],"between":[61,72],"and":[64,87,120,135],"query":[65,136,163],"image.":[66,164],"To":[67],"address":[68],"inherent":[70],"conflicts":[71],"two":[74,115],"images,":[75],"we":[76,113],"propose":[77],"AlignCAPE,":[78],"novel":[80],"approach":[81],"designed":[82],"mitigate":[84],"such":[85],"enhance":[88],"performance.":[91],"Our":[92],"method":[93,172],"formulates":[94],"two-stage":[96],"pipeline,":[97],"generating":[98],"initial":[99],"proposals":[100],"first":[103],"stage,":[104],"followed":[105],"another":[107],"stage":[108],"refine":[110],"iteratively.":[111],"Specifically,":[112],"introduce":[114],"modules,":[116],"Feature":[117],"Alignment":[118],"Module(FAM)":[119],"Keypoint":[121],"Perception":[122],"Module(KPM).":[123],"FAM":[124],"utilizes":[125],"bidirectional":[126],"cross-attention":[127],"operation":[128],"align":[130],"image":[133,137],"feature":[134],"feature,":[138],"thereby":[139],"compensating":[140],"for":[141],"limitations":[143],"previous":[145],"methods.":[146],"KPM":[147],"employs":[148],"self-attention":[149],"mechanism":[150],"capture":[152],"interactions":[154],"among":[155],"keypoints,":[156],"facilitating":[157],"localize":[159],"Experiments":[165],"MP-100":[167],"benchmark":[168],"demonstrate":[169],"that":[170],"our":[171],"outperforms":[173],"widely-used":[175],"baseline":[176],"CAPE":[179],"0.68%":[181],"PCK@0.2":[183],"metric":[184],"under":[185],"1-shot":[186],"setting.":[187]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
