{"id":"https://openalex.org/W7138059873","doi":"https://doi.org/10.1609/aaai.v40i13.38049","title":"Dual Coding Theory in Action: Language-Assisted Human Pose Estimation in Videos","display_name":"Dual Coding Theory in Action: Language-Assisted Human Pose Estimation in Videos","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138059873","doi":"https://doi.org/10.1609/aaai.v40i13.38049"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i13.38049","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i13.38049","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38049/42011","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38049/42011","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129722807","display_name":"Sifan Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sifan Wu","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University\nKey Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University\nKey Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129693808","display_name":"Haipeng Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haipeng Chen","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University\nKey Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University\nKey Laboratory of Symbolic Computation and Knowledge Engineering of Ministry of Education, Jilin University","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129720155","display_name":"Yingda Lyu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]},{"id":"https://openalex.org/I4210141933","display_name":"Jilin Agricultural Science and Technology University","ror":"https://ror.org/04w5zb891","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210141933"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingda Lyu","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University\nPublic Computer Education and Research Center, Jilin University"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University\nPublic Computer Education and Research Center, Jilin University","institution_ids":["https://openalex.org/I4210141933","https://openalex.org/I4210134929"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129739431","display_name":"Shaojing Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shaojing Fan","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National University of Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129682684","display_name":"Zhigang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhigang Wang","raw_affiliation_strings":["The State Key Laboratory of Blockchain and Data Security, Zhejiang University"],"affiliations":[{"raw_affiliation_string":"The State Key Laboratory of Blockchain and Data Security, Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129728194","display_name":"Zhenguang Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I1302611135","display_name":"Ministry of Public Security of the People's Republic of China","ror":"https://ror.org/00bt9we26","country_code":"CN","type":"government","lineage":["https://openalex.org/I1302611135"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenguang Liu","raw_affiliation_strings":["The State Key Laboratory of Blockchain and Data Security, Zhejiang University\nShandong Rendui Network Co., Ltd.\nHangzhou High-Tech Zone (Binjiang) Institute of Blockchain and Data Security"],"affiliations":[{"raw_affiliation_string":"The State Key Laboratory of Blockchain and Data Security, Zhejiang University\nShandong Rendui Network Co., Ltd.\nHangzhou High-Tech Zone (Binjiang) Institute of Blockchain and Data Security","institution_ids":["https://openalex.org/I1302611135"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068056273","display_name":"Yingying Jiao","orcid":null},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingying Jiao","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University of Technology"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University of Technology","institution_ids":["https://openalex.org/I55712492"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129722807"],"corresponding_institution_ids":["https://openalex.org/I194450716"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35342262,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"13","first_page":"10745","last_page":"10753"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.832099974155426,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.832099974155426,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09529999643564224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.021400000900030136,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5608999729156494},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.5498999953269958},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5145999789237976},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5013999938964844},{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.4332999885082245},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.4235999882221222},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.39320001006126404},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.38260000944137573},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.3709999918937683},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.3560999929904938}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7555000185966492},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6407999992370605},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5608999729156494},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.5498999953269958},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5145999789237976},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5013999938964844},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.4332999885082245},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.427700012922287},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.4235999882221222},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.39320001006126404},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.38260000944137573},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3709999918937683},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.3546999990940094},{"id":"https://openalex.org/C77637269","wikidata":"https://www.wikidata.org/wiki/Q7002051","display_name":"Neural coding","level":2,"score":0.34139999747276306},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.3375000059604645},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3253999948501587},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C200220432","wikidata":"https://www.wikidata.org/wiki/Q7936208","display_name":"Vision science","level":2,"score":0.3107999861240387},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.302700012922287},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.30239999294281006},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.2996000051498413},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.29330000281333923},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.29100000858306885},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.28450000286102295},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27730000019073486},{"id":"https://openalex.org/C2776010242","wikidata":"https://www.wikidata.org/wiki/Q4677575","display_name":"Active perception","level":3,"score":0.2732999920845032},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2637999951839447},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2574000060558319},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i13.38049","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i13.38049","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38049/42011","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i13.38049","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i13.38049","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38049/42011","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3657713562","display_name":null,"funder_award_id":"62372402","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6058138561","display_name":null,"funder_award_id":", No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8209437284","display_name":null,"funder_award_id":"62276112","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138059873.pdf","grobid_xml":"https://content.openalex.org/works/W7138059873.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Video-based":[0],"human":[1,14,54,137],"pose":[2,113,185],"estimation":[3],"aims":[4],"to":[5,68,110,132],"localize":[6],"keypoints":[7],"across":[8],"frames,":[9],"enabling":[10],"robust":[11,184],"analysis":[12],"of":[13,73,119,136],"motion":[15,40,195],"in":[16,35,188],"applications":[17],"such":[18],"as":[19,106],"sports,":[20],"surveillance,":[21],"and":[22,90,162,183,194],"healthcare.":[23],"However,":[24],"existing":[25],"methods":[26],"rely":[27],"solely":[28],"on":[29,199],"visual":[30,63,160],"cues,":[31],"limiting":[32],"their":[33],"robustness":[34],"complex":[36],"scenes":[37,191],"involving":[38],"occlusion,":[39],"blur,":[41],"or":[42],"poor":[43],"lighting.":[44],"In":[45],"contrast,":[46],"dual":[47],"coding":[48],"theory":[49],"from":[50,139],"psychology":[51],"suggests":[52],"that":[53,87,102,203],"cognition":[55],"is":[56],"inherently":[57],"multimodal:":[58],"we":[59,97,123],"learn":[60],"by":[61,93],"integrating":[62],"perception":[64],"with":[65],"linguistic":[66],"context":[67],"form":[69],"structured,":[70],"semantic":[71,152],"understandings":[72],"the":[74,99,117],"world.":[75],"Visual":[76],"input":[77],"provides":[78],"concrete":[79],"spatiotemporal":[80,159],"grounding,":[81],"while":[82,165],"language":[83,105],"offers":[84],"symbolic":[85],"abstraction":[86],"enhances":[88],"reasoning":[89],"generalization.":[91],"Motivated":[92],"this":[94],"cognitive":[95],"principle,":[96],"present":[98],"first":[100,124],"framework":[101],"explicitly":[103],"incorporates":[104],"an":[107,166],"auxiliary":[108],"modality":[109],"enhance":[111],"video-based":[112],"estimation.":[114],"To":[115],"address":[116],"lack":[118],"paired":[120],"video-text":[121],"datasets,":[122],"employ":[125],"a":[126,144,150],"Multimodal":[127],"Large":[128],"Language":[129],"Model":[130],"(MLLM)":[131],"generate":[133],"textual":[134,163],"descriptions":[135],"interactions":[138],"videos.":[140],"We":[141],"then":[142],"propose":[143],"novel":[145],"coarse-to-fine":[146],"multimodal":[147],"alignment":[148],"pipeline:":[149],"cross-modal":[151],"interaction":[153],"module":[154],"establishes":[155],"initial":[156],"grounding":[157],"between":[158],"features":[161],"embeddings,":[164],"optimal":[167],"transport-based":[168],"feature":[169],"matching":[170],"mechanism":[171],"enforces":[172],"fine-grained,":[173],"geometry-aware":[174],"alignment.":[175],"This":[176],"cognitively":[177],"inspired":[178],"design":[179],"enables":[180],"more":[181],"accurate":[182],"estimation,":[186],"especially":[187],"visually":[189],"challenging":[190],"like":[192],"occlusion":[193],"blur.":[196],"Extensive":[197],"experiments":[198],"three":[200],"benchmarks":[201],"confirm":[202],"our":[204],"method":[205],"consistently":[206],"outperforms":[207],"state-of-the-art":[208],"approaches.":[209]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2026-03-18T00:00:00"}
