{"id":"https://openalex.org/W7134818291","doi":"https://doi.org/10.48550/arxiv.2603.08317","title":"Human-AI Divergence in Ego-centric Action Recognition under Spatial and Spatiotemporal Manipulations","display_name":"Human-AI Divergence in Ego-centric Action Recognition under Spatial and Spatiotemporal Manipulations","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134818291","doi":"https://doi.org/10.48550/arxiv.2603.08317"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08317","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114368004","display_name":"Sadegh Rahmaniboldaji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rahmaniboldaji, Sadegh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114368005","display_name":"Filip Rybansky","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rybansky, Filip","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003127881","display_name":"Quoc C. Vuong","orcid":"https://orcid.org/0000-0002-5983-964X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vuong, Quoc C.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090512379","display_name":"Anya Hurlbert","orcid":"https://orcid.org/0000-0002-9879-5758"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hurlbert, Anya C.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090217029","display_name":"Frank Gu\u00e9rin","orcid":"https://orcid.org/0000-0003-1918-6311"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guerin, Frank","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128672903","display_name":"Andrew Gilbert","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gilbert, Andrew","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9742000102996826,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9742000102996826,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.003700000001117587,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11431","display_name":"Action Observation and Synchronization","score":0.003100000089034438,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.5666999816894531},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5292999744415283},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4999000132083893},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.4699000120162964},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4020000100135803},{"id":"https://openalex.org/keywords/spatial-relation","display_name":"Spatial relation","score":0.3856000006198883},{"id":"https://openalex.org/keywords/scrambling","display_name":"Scrambling","score":0.33390000462532043},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.3280999958515167}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6182000041007996},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5920000076293945},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.5666999816894531},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5292999744415283},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4999000132083893},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.4699000120162964},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4020000100135803},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.3856000006198883},{"id":"https://openalex.org/C182548165","wikidata":"https://www.wikidata.org/wiki/Q2261483","display_name":"Scrambling","level":2,"score":0.33390000462532043},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.3280999958515167},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3041999936103821},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.303600013256073},{"id":"https://openalex.org/C94361409","wikidata":"https://www.wikidata.org/wiki/Q7882500","display_name":"Uncertainty reduction theory","level":2,"score":0.28619998693466187},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C158709400","wikidata":"https://www.wikidata.org/wiki/Q3578586","display_name":"Spatial ecology","level":2,"score":0.2808000147342682},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.2721000015735626},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C2777489503","wikidata":"https://www.wikidata.org/wiki/Q7698936","display_name":"Temporal scales","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08317","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.08317","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08317","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08317","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Humans":[0],"consistently":[1],"outperform":[2],"state-of-the-art":[3],"AI":[4],"models":[5],"in":[6,10],"action":[7,49],"recognition,":[8],"particularly":[9],"challenging":[11],"real-world":[12],"conditions":[13],"involving":[14],"low":[15],"resolution,":[16],"occlusion,":[17],"and":[18,34,81,96,108,120,130,134,147,186,191],"visual":[19,132],"clutter.":[20],"Understanding":[21],"the":[22,59,109,181,217],"sources":[23],"of":[24,47,126,140],"this":[25,38],"performance":[26,100,156],"gap":[27],"is":[28,101],"essential":[29],"for":[30,66],"developing":[31],"more":[32,184],"robust":[33,207],"human-aligned":[35],"models.":[36],"In":[37,179],"paper,":[39],"we":[40],"present":[41],"a":[42,77,138,167],"large-scale":[43],"human-AI":[44],"comparative":[45],"study":[46],"egocentric":[48],"recognition":[50],"using":[51,103],"Minimal":[52],"Identifiable":[53],"Recognition":[54,99,121],"Crops":[55],"(MIRCs),":[56],"defined":[57],"as":[58,176],"smallest":[60],"spatial":[61,93,127,202,212],"or":[62],"spatiotemporal":[63,135],"regions":[64],"sufficient":[65],"reliable":[67],"human":[68,106,155],"recognition.":[69],"We":[70],"used":[71],"our":[72],"previously":[73],"introduced,":[74],"Epic":[75],"ReduAct,":[76],"systematically":[78],"spatially":[79],"reduced":[80],"temporally":[82],"scrambled":[83],"dataset":[84],"derived":[85],"from":[86,162],"36":[87],"EPIC":[88],"KITCHENS":[89],"videos,":[90],"spanning":[91],"multiple":[92],"reduction":[94],"levels":[95],"temporal":[97,223,227],"conditions.":[98],"evaluated":[102],"over":[104],"3,000":[105],"participants":[107],"Side4Video":[110],"model.":[111],"Our":[112],"analysis":[113],"combines":[114],"quantitative":[115],"metrics,":[116],"Average":[117],"Reduction":[118],"Rate":[119],"Gap,":[122],"with":[123],"qualitative":[124],"analyses":[125],"(high-,":[128],"mid-,":[129],"low-level":[131,194],"features)":[133],"factors,":[136],"including":[137],"categorisation":[139],"actions":[141],"into":[142],"Low":[143],"Temporal":[144,149],"Actions":[145,150],"(LTA)":[146],"High":[148],"(HTA).":[151],"Results":[152],"show":[153],"that":[154],"exhibits":[157],"sharp":[158],"declines":[159],"when":[160,210],"transitioning":[161],"MIRCs":[163],"to":[164,193,208,222],"subMIRCs,":[165],"reflecting":[166],"strong":[168],"reliance":[169],"on":[170,189],"sparse,":[171],"semantically":[172],"critical":[173],"cues":[174,213],"such":[175],"hand-object":[177],"interactions.":[178],"contrast,":[180],"model":[182,218],"degrades":[183],"gradually":[185],"often":[187,219],"relies":[188],"contextual":[190],"mid-":[192],"features,":[195],"sometimes":[196],"even":[197],"exhibiting":[198],"increased":[199],"confidence":[200],"under":[201],"reduction.":[203],"Temporally,":[204],"humans":[205],"remain":[206],"scrambling":[209],"key":[211],"are":[214],"preserved,":[215],"whereas":[216],"shows":[220],"insensitivity":[221],"disruption,":[224],"revealing":[225],"class-dependent":[226],"sensitivities.":[228]},"counts_by_year":[],"updated_date":"2026-07-01T08:55:40.977307","created_date":"2026-03-11T00:00:00"}
