{"id":"https://openalex.org/W7151480991","doi":"https://doi.org/10.48550/arxiv.2604.04467","title":"Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning","display_name":"Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning","publication_year":2026,"publication_date":"2026-04-06","ids":{"openalex":"https://openalex.org/W7151480991","doi":"https://doi.org/10.48550/arxiv.2604.04467"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.04467","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04467","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.04467","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133125432","display_name":"Ryuki Tezuka","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tezuka, Ryuki","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042267985","display_name":"Chihiro Nakatani","orcid":"https://orcid.org/0009-0000-5966-2672"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nakatani, Chihiro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133143493","display_name":"Norimichi Ukita","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ukita, Norimichi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5133125432"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.7767000198364258,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.7767000198364258,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.03579999879002571,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.030799999833106995,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.607699990272522},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5745999813079834},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5576000213623047},{"id":"https://openalex.org/keywords/spatial-contextual-awareness","display_name":"Spatial contextual awareness","score":0.5203999876976013},{"id":"https://openalex.org/keywords/group","display_name":"Group (periodic table)","score":0.5016000270843506},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.48240000009536743},{"id":"https://openalex.org/keywords/pretext","display_name":"Pretext","score":0.4440999925136566},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.43479999899864197},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.4011000096797943}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6711000204086304},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.647599995136261},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.607699990272522},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5745999813079834},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5576000213623047},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.5203999876976013},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.5016000270843506},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.48240000009536743},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4514000117778778},{"id":"https://openalex.org/C2779627259","wikidata":"https://www.wikidata.org/wiki/Q779763","display_name":"Pretext","level":3,"score":0.4440999925136566},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.43479999899864197},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.4011000096797943},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3991999924182892},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.39320001006126404},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.33869999647140503},{"id":"https://openalex.org/C2983025851","wikidata":"https://www.wikidata.org/wiki/Q1040098","display_name":"Group learning","level":2,"score":0.3206000030040741},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3116999864578247},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3005000054836273},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2996000051498413},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2953000068664551},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.27799999713897705},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C2779542340","wikidata":"https://www.wikidata.org/wiki/Q1062461","display_name":"Learning object","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C138020889","wikidata":"https://www.wikidata.org/wiki/Q2349659","display_name":"Collaborative learning","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.04467","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04467","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.04467","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04467","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"Group":[3],"Activity":[4],"Feature":[5],"(GAF)":[6],"learning":[7,50],"without":[8],"group":[9,56,91,128],"activity":[10,129],"annotations.":[11],"Unlike":[12],"prior":[13],"work,":[14],"which":[15,84],"uses":[16],"low-level":[17],"static":[18],"local":[19,34,52,79],"features":[20,37],"to":[21,51,76,101],"learn":[22,102],"GAFs,":[23],"we":[24],"propose":[25],"leveraging":[26],"dynamics-aware":[27],"and":[28,35,48,54,65,110,131],"group-aware":[29],"pretext":[30,59],"tasks,":[31],"along":[32],"with":[33],"global":[36,55,113],"provided":[38],"by":[39],"DINO,":[40],"for":[41,89],"group-dynamics-aware":[42],"GAF":[43,49],"learning.":[44],"To":[45],"adapt":[46],"DINO":[47],"dynamics":[53],"features,":[57],"our":[58,125,143],"tasks":[60],"use":[61],"person":[62],"flow":[63,72],"estimation":[64,73,98],"group-relevant":[66,95],"object":[67,96],"location":[68,97],"estimation,":[69],"respectively.":[70],"Person":[71],"is":[74,85],"used":[75],"represent":[77],"the":[78,121,137],"motion":[80],"of":[81,108,124,139],"each":[82,140],"person,":[83],"an":[86],"important":[87],"cue":[88],"understanding":[90],"activities.":[92],"In":[93],"contrast,":[94],"encourages":[99],"GAFs":[100],"scene":[103],"context":[104],"(e.g.,":[105],"spatial":[106],"relations":[107],"people":[109],"objects)":[111],"as":[112],"features.":[114],"Comprehensive":[115],"experiments":[116],"on":[117],"public":[118],"datasets":[119],"demonstrate":[120],"state-of-the-art":[122],"performance":[123],"method":[126],"in":[127,142],"retrieval":[130],"recognition.":[132],"Our":[133],"ablation":[134],"studies":[135],"verify":[136],"effectiveness":[138],"component":[141],"method.":[144],"Code:":[145],"https://github.com/tezuka0001/Group-DINOmics.":[146]},"counts_by_year":[],"updated_date":"2026-04-08T06:07:18.267832","created_date":"2026-04-08T00:00:00"}
