{"id":"https://openalex.org/W7137814627","doi":"https://doi.org/10.1609/aaai.v40i14.38210","title":"End-to-End Multi-Person Pose Estimation with Pose-Aware Video Transformer","display_name":"End-to-End Multi-Person Pose Estimation with Pose-Aware Video Transformer","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137814627","doi":"https://doi.org/10.1609/aaai.v40i14.38210"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i14.38210","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38210","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38210/42172","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38210/42172","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121722167","display_name":"Yonghui Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I75059550","display_name":"Zhejiang Gongshang University","ror":"https://ror.org/0569mkk41","country_code":"CN","type":"education","lineage":["https://openalex.org/I75059550"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yonghui Yu","raw_affiliation_strings":["Zhejiang Gongshang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Gongshang University","institution_ids":["https://openalex.org/I75059550"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048955262","display_name":"Jiahang CAI","orcid":null},"institutions":[{"id":"https://openalex.org/I75059550","display_name":"Zhejiang Gongshang University","ror":"https://ror.org/0569mkk41","country_code":"CN","type":"education","lineage":["https://openalex.org/I75059550"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahang Cai","raw_affiliation_strings":["Zhejiang Gongshang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Gongshang University","institution_ids":["https://openalex.org/I75059550"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129706827","display_name":"Xun Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I75059550","display_name":"Zhejiang Gongshang University","ror":"https://ror.org/0569mkk41","country_code":"CN","type":"education","lineage":["https://openalex.org/I75059550"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xun Wang","raw_affiliation_strings":["Zhejiang Gongshang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Gongshang University","institution_ids":["https://openalex.org/I75059550"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129674467","display_name":"Wenwu Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I75059550","display_name":"Zhejiang Gongshang University","ror":"https://ror.org/0569mkk41","country_code":"CN","type":"education","lineage":["https://openalex.org/I75059550"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenwu Yang","raw_affiliation_strings":["Zhejiang Gongshang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Gongshang University","institution_ids":["https://openalex.org/I75059550"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5121722167"],"corresponding_institution_ids":["https://openalex.org/I75059550"],"apc_list":null,"apc_paid":null,"fwci":12.8349,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.93633763,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"40","issue":"14","first_page":"12196","last_page":"12203"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9728000164031982,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9728000164031982,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.009800000116229057,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.006300000008195639,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.7501999735832214},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6013000011444092},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5437999963760376},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.4738999903202057},{"id":"https://openalex.org/keywords/3d-pose-estimation","display_name":"3D pose estimation","score":0.4154999852180481},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4099000096321106},{"id":"https://openalex.org/keywords/articulated-body-pose-estimation","display_name":"Articulated body pose estimation","score":0.3885999917984009}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7817000150680542},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.7501999735832214},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7315999865531921},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6013000011444092},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5906000137329102},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5437999963760376},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.4738999903202057},{"id":"https://openalex.org/C36613465","wikidata":"https://www.wikidata.org/wiki/Q4636322","display_name":"3D pose estimation","level":3,"score":0.4154999852180481},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4099000096321106},{"id":"https://openalex.org/C22100474","wikidata":"https://www.wikidata.org/wiki/Q4800952","display_name":"Articulated body pose estimation","level":4,"score":0.3885999917984009},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3822999894618988},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.3634999990463257},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.35420000553131104},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2549000084400177},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i14.38210","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38210","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38210/42172","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i14.38210","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38210","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38210/42172","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137814627.pdf","grobid_xml":"https://content.openalex.org/works/W7137814627.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Existing":[0],"multi-person":[1,53],"video":[2],"pose":[3,22,55,102,124,145,162],"estimation":[4,56],"methods":[5],"typically":[6],"adopt":[7],"a":[8,48,83,92,100,117,176],"two-stage":[9,188],"pipeline:":[10],"detecting":[11],"individuals":[12,69],"in":[13,57,195],"each":[14,123],"frame,":[15],"followed":[16],"by":[17],"temporal":[18,76,113],"modeling":[19],"for":[20,52,158],"single-person":[21],"estimation.":[23,163],"This":[24],"design":[25],"relies":[26],"on":[27,180],"heuristic":[28,61],"operations":[29],"such":[30],"as":[31],"tracking,":[32],"RoI":[33],"cropping,":[34],"and":[35,41,74,99,182],"non-maximum":[36],"suppression,":[37],"limiting":[38],"both":[39],"accuracy":[40,184],"efficiency.":[42,196],"In":[43],"this":[44],"paper,":[45],"we":[46,81,115,139],"present":[47],"fully":[49],"end-to-end":[50,156,173],"framework":[51],"2D":[54,160],"videos,":[58],"effectively":[59],"eliminating":[60],"operations.":[62],"A":[63],"key":[64],"challenge":[65],"is":[66,153],"to":[67,95,104,126,131,147],"associate":[68],"across":[70,108,135],"frames":[71],"under":[72],"complex":[73],"overlapping":[75],"trajectories.":[77],"To":[78,110],"address":[79],"this,":[80],"introduce":[82],"novel":[84],"Pose-Aware":[85],"Video":[86],"transformEr":[87],"Network":[88],"(PAVE-Net),":[89],"which":[90],"features":[91,129],"spatial":[93],"encoder":[94],"model":[96,141],"intra-frame":[97],"relations":[98],"spatiotemporal":[101,142],"decoder":[103],"capture":[105],"global":[106],"dependencies":[107,143],"frames.":[109,137],"achieve":[111],"accurate":[112],"association,":[114],"propose":[116],"pose-aware":[118],"attention":[119],"mechanism":[120],"that":[121,167],"enables":[122],"query":[125],"selectively":[127],"aggregate":[128],"corresponding":[130],"the":[132,154],"same":[133],"individual":[134],"consecutive":[136],"Additionally,":[138],"explicitly":[140],"among":[144],"keypoints":[146],"improve":[148],"accuracy.":[149],"Notably,":[150],"our":[151],"approach":[152],"first":[155],"method":[157],"multi-frame":[159],"human":[161],"Extensive":[164],"experiments":[165],"show":[166],"PAVE-Net":[168],"substantially":[169],"outperforms":[170],"prior":[171],"image-based":[172],"methods,":[174],"achieving":[175],"6.0":[177],"mAP":[178],"improvement":[179],"PoseTrack2017,":[181],"delivers":[183],"competitive":[185],"with":[186],"state-of-the-art":[187],"video-based":[189],"approaches,":[190],"while":[191],"offering":[192],"significant":[193],"gains":[194]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
