{"id":"https://openalex.org/W7164814916","doi":"https://doi.org/10.1145/3805622.3810764","title":"Egocentric Action Recognition with Retrieval-Augmented Learning","display_name":"Egocentric Action Recognition with Retrieval-Augmented Learning","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164814916","doi":"https://doi.org/10.1145/3805622.3810764"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810764","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810764","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810764","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044881779","display_name":"Yishan Zou","orcid":"https://orcid.org/0009-0006-8450-0388"},"institutions":[{"id":"https://openalex.org/I138801177","display_name":"University of Ulster","ror":"https://ror.org/01yp9g959","country_code":"GB","type":"education","lineage":["https://openalex.org/I138801177"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yishan Zou","raw_affiliation_strings":["Ulster University, Belfast, United Kingdom"],"raw_orcid":"https://orcid.org/0009-0006-8450-0388","affiliations":[{"raw_affiliation_string":"Ulster University, Belfast, United Kingdom","institution_ids":["https://openalex.org/I138801177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021247547","display_name":"Chris Nugent","orcid":"https://orcid.org/0000-0003-0882-7902"},"institutions":[{"id":"https://openalex.org/I138801177","display_name":"University of Ulster","ror":"https://ror.org/01yp9g959","country_code":"GB","type":"education","lineage":["https://openalex.org/I138801177"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chris Nugent","raw_affiliation_strings":["Ulster University, Belfast, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0003-0882-7902","affiliations":[{"raw_affiliation_string":"Ulster University, Belfast, United Kingdom","institution_ids":["https://openalex.org/I138801177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069915268","display_name":"Matthew Burns","orcid":"https://orcid.org/0000-0001-8931-2454"},"institutions":[{"id":"https://openalex.org/I138801177","display_name":"University of Ulster","ror":"https://ror.org/01yp9g959","country_code":"GB","type":"education","lineage":["https://openalex.org/I138801177"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Matthew Burns","raw_affiliation_strings":["Ulster University, Belfast, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0001-8931-2454","affiliations":[{"raw_affiliation_string":"Ulster University, Belfast, United Kingdom","institution_ids":["https://openalex.org/I138801177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072246520","display_name":"Shengli Wu","orcid":"https://orcid.org/0000-0003-2008-1736"},"institutions":[{"id":"https://openalex.org/I138801177","display_name":"University of Ulster","ror":"https://ror.org/01yp9g959","country_code":"GB","type":"education","lineage":["https://openalex.org/I138801177"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shengli Wu","raw_affiliation_strings":["Ulster University, Belfast, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0003-2008-1736","affiliations":[{"raw_affiliation_string":"Ulster University, Belfast, United Kingdom","institution_ids":["https://openalex.org/I138801177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040666380","display_name":"Mingzhu Xu","orcid":"https://orcid.org/0000-0002-1492-0970"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingzhu Xu","raw_affiliation_strings":["Shandong University, Jinan, China"],"raw_orcid":"https://orcid.org/0000-0002-1492-0970","affiliations":[{"raw_affiliation_string":"Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100457438","display_name":"Meng Liu","orcid":"https://orcid.org/0000-0002-1582-5764"},"institutions":[{"id":"https://openalex.org/I44445938","display_name":"Shandong Jianzhu University","ror":"https://ror.org/01gbfax37","country_code":"CN","type":"education","lineage":["https://openalex.org/I44445938"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Liu","raw_affiliation_strings":["Shandong Jianzhu Universiry, Jinan, China"],"raw_orcid":"https://orcid.org/0000-0002-1582-5764","affiliations":[{"raw_affiliation_string":"Shandong Jianzhu Universiry, Jinan, China","institution_ids":["https://openalex.org/I44445938"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93466624,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"681","last_page":"689"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11431","display_name":"Action Observation and Synchronization","score":0.001500000013038516,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0010999999940395355,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.6714000105857849},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.6678000092506409},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6155999898910522},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5544999837875366},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5397999882698059},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.48969998955726624},{"id":"https://openalex.org/keywords/endocentric-and-exocentric","display_name":"Endocentric and exocentric","score":0.48159998655319214},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.46470001339912415}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7684000134468079},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.6714000105857849},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.6678000092506409},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6388999819755554},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6155999898910522},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5544999837875366},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5397999882698059},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.48969998955726624},{"id":"https://openalex.org/C131042201","wikidata":"https://www.wikidata.org/wiki/Q493198","display_name":"Endocentric and exocentric","level":4,"score":0.48159998655319214},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.46470001339912415},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.42160001397132874},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.414000004529953},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.37709999084472656},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.33340001106262207},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3287000060081482},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C121687571","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Activity recognition","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2971999943256378},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C2780103172","wikidata":"https://www.wikidata.org/wiki/Q1309721","display_name":"Visual Objects","level":3,"score":0.2784999907016754},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2549999952316284},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.2547999918460846},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810764","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810764","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810764","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810764","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1947050545","https://openalex.org/W2798354744","https://openalex.org/W2895299763","https://openalex.org/W2897628926","https://openalex.org/W2952132648","https://openalex.org/W2962933664","https://openalex.org/W2963351448","https://openalex.org/W3109241881","https://openalex.org/W3152619510","https://openalex.org/W3199693760","https://openalex.org/W4205817612","https://openalex.org/W4303444943","https://openalex.org/W4312772544","https://openalex.org/W4312920106","https://openalex.org/W4313071313","https://openalex.org/W4385573236","https://openalex.org/W4386076004","https://openalex.org/W4386076314","https://openalex.org/W4390871800","https://openalex.org/W4390871901","https://openalex.org/W4390872374","https://openalex.org/W4390873422","https://openalex.org/W4390873954","https://openalex.org/W4394625525","https://openalex.org/W4394625615","https://openalex.org/W4402727889","https://openalex.org/W4402754020","https://openalex.org/W4403561465","https://openalex.org/W4406526575","https://openalex.org/W4408353384","https://openalex.org/W4409261716","https://openalex.org/W4412375108","https://openalex.org/W4413147761","https://openalex.org/W4414243647","https://openalex.org/W7133209102","https://openalex.org/W7133211551"],"related_works":[],"abstract_inverted_index":{"Egocentric":[0],"Action":[1],"Recognition":[2],"(EAR)":[3],"aims":[4],"to":[5],"identify":[6],"fine-grained":[7],"actions":[8],"and":[9,35,94,112,137],"interacted":[10],"objects":[11],"from":[12],"first-person":[13],"videos,":[14],"forming":[15],"a":[16,46,66,86,101],"core":[17],"task":[18],"in":[19,134],"egocentric":[20,72],"video":[21],"understanding.":[22],"Despite":[23],"recent":[24],"progress,":[25],"EAR":[26,50],"remains":[27],"challenged":[28],"by":[29],"limited":[30],"data":[31],"scale,":[32],"annotation":[33],"quality,":[34],"long-tailed":[36],"class":[37,99,110],"distributions.":[38],"To":[39,97],"address":[40],"these":[41],"issues,":[42],"we":[43],"propose":[44],"REAR,":[45],"Retrieval-augmented":[47],"framework":[48],"for":[49],"that":[51,90,126],"leverages":[52],"external":[53],"third-person":[54],"(exocentric)":[55],"videos":[56],"as":[57],"auxiliary":[58],"knowledge\u2014without":[59],"requiring":[60],"synchronized":[61],"ego-exo":[62],"pairs.":[63],"REAR":[64,127],"adopts":[65],"dual-branch":[67],"architecture:":[68],"one":[69],"branch":[70],"extracts":[71],"representations,":[73],"while":[74],"the":[75],"other":[76],"retrieves":[77],"semantically":[78],"relevant":[79],"exocentric":[80],"features.":[81],"These":[82],"are":[83,115],"fused":[84],"via":[85],"cross-view":[87],"integration":[88],"module":[89],"performs":[91],"staged":[92],"refinement":[93],"attention-based":[95],"alignment.":[96],"mitigate":[98],"imbalance,":[100],"class-adaptive":[102],"selector":[103],"dynamically":[104],"adjusts":[105],"retrieval":[106],"depth":[107],"based":[108],"on":[109],"frequency,":[111],"independent":[113],"classifiers":[114],"trained":[116],"with":[117,131],"logit-adjusted":[118],"cross-entropy.":[119],"Extensive":[120],"experiments":[121],"across":[122],"three":[123],"benchmarks":[124],"demonstrate":[125],"achieves":[128],"state-of-the-art":[129],"performance,":[130],"significant":[132],"gains":[133],"object":[135],"recognition":[136],"tail-class":[138],"accuracy.":[139],"The":[140],"source":[141],"code":[142],"is":[143],"publicly":[144],"available":[145],"at":[146],"https://github.com/zou-y23/REAR.":[147]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
