{"id":"https://openalex.org/W4409150028","doi":"https://doi.org/10.1145/3690624.3709168","title":"Reasoning-Enhanced Object-Centric Learning for Videos","display_name":"Reasoning-Enhanced Object-Centric Learning for Videos","publication_year":2025,"publication_date":"2025-04-04","ids":{"openalex":"https://openalex.org/W4409150028","doi":"https://doi.org/10.1145/3690624.3709168"},"language":"en","primary_location":{"id":"doi:10.1145/3690624.3709168","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3690624.3709168","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100773861","display_name":"Jian Li","orcid":"https://orcid.org/0000-0002-0685-0861"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jian Li","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035880261","display_name":"Pu Ren","orcid":null},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pu Ren","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100356057","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0003-0127-4030"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Liu","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089604961","display_name":"Hao Sun","orcid":"https://orcid.org/0000-0002-5145-3259"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Sun","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100773861"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05205633,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"659","last_page":"670"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7616931200027466},{"id":"https://openalex.org/keywords/learning-object","display_name":"Learning object","score":0.5242809653282166},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5240621566772461},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44727474451065063},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3794666528701782}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7616931200027466},{"id":"https://openalex.org/C2779542340","wikidata":"https://www.wikidata.org/wiki/Q1062461","display_name":"Learning object","level":2,"score":0.5242809653282166},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5240621566772461},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44727474451065063},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3794666528701782}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3690624.3709168","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3690624.3709168","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W2033403400","https://openalex.org/W2133665775","https://openalex.org/W2177899970","https://openalex.org/W2599674900","https://openalex.org/W2962785568","https://openalex.org/W2977741895","https://openalex.org/W2998432370","https://openalex.org/W2999794487","https://openalex.org/W3015966228","https://openalex.org/W3021538729","https://openalex.org/W3022945091","https://openalex.org/W3033991488","https://openalex.org/W3035470414","https://openalex.org/W3037338873","https://openalex.org/W3037784242","https://openalex.org/W3122866338","https://openalex.org/W3126721948","https://openalex.org/W3128196514","https://openalex.org/W3128267727","https://openalex.org/W3156909481","https://openalex.org/W3164797320","https://openalex.org/W3167945274","https://openalex.org/W3202454753","https://openalex.org/W3203092180","https://openalex.org/W3210711965","https://openalex.org/W3211393675","https://openalex.org/W3217765097","https://openalex.org/W4206071945","https://openalex.org/W4226333682","https://openalex.org/W4235169531","https://openalex.org/W4283074245","https://openalex.org/W4284677303","https://openalex.org/W4287123477","https://openalex.org/W4312339971","https://openalex.org/W4318963850","https://openalex.org/W4320458262","https://openalex.org/W4386071988","https://openalex.org/W6779809370"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Object-centric":[0],"learning":[1,22,153],"aims":[2],"to":[3,76,98],"break":[4],"down":[5],"complex":[6,105],"visual":[7],"scenes":[8],"into":[9],"more":[10],"manageable":[11],"object":[12,67],"representations,":[13],"enhancing":[14],"the":[15,25,44,47,52,90,100,120,141,147,162],"understanding":[16],"and":[17,38,56,66,131,171,182],"reasoning":[18,49,55,87],"abilities":[19,58,72],"of":[20,46,149],"machine":[21],"systems":[23],"toward":[24],"physical":[26],"world.":[27],"Recently,":[28],"slot-based":[29,127],"video":[30],"models":[31,154],"have":[32],"demonstrated":[33],"remarkable":[34],"proficiency":[35],"in":[36,63,69,104,168],"segmenting":[37],"tracking":[39],"objects,":[40],"but":[41],"they":[42],"overlook":[43],"importance":[45],"effective":[48],"module.":[50],"In":[51],"real":[53],"world,":[54],"predictive":[57,160],"play":[59],"a":[60,85,159],"crucial":[61],"role":[62],"human":[64,77],"perception":[65,102],"tracking;":[68],"particular,":[70],"these":[71],"are":[73],"closely":[74],"related":[75],"intuitive":[78],"physics.":[79],"Inspired":[80],"by":[81],"this,":[82],"we":[83],"designed":[84],"novel":[86],"module":[88,143,164],"called":[89],"Slot-based":[91,121],"Time-Space":[92,122],"Transformer":[93,123],"with":[94],"Memory":[95],"buffer":[96,109],"(STATM)":[97],"enhance":[99,146],"model's":[101],"ability":[103],"scenes.":[106],"The":[107],"memory":[108],"primarily":[110],"serves":[111],"as":[112,158],"storage":[113],"for":[114,155],"slot":[115],"information":[116],"from":[117],"upstream":[118],"modules,":[119],"makes":[124],"predictions":[125],"through":[126],"spatiotemporal":[128],"attention":[129],"computations":[130],"fusion.":[132],"Our":[133],"experimental":[134],"results":[135],"on":[136],"various":[137],"datasets":[138],"indicate":[139],"that":[140],"STATM":[142,163],"can":[144],"significantly":[145],"capabilities":[148],"multiple":[150],"state-of-the-art":[151],"object-centric":[152],"video.":[156],"Moreover,":[157],"model,":[161],"also":[165],"performs":[166],"well":[167],"downstream":[169],"prediction":[170],"Visual":[172],"Question":[173],"Answering":[174],"(VQA)":[175],"tasks.":[176],"We":[177],"will":[178],"release":[179],"our":[180],"codes":[181],"data":[183],"at":[184],"https://github.com/intell-sci-comput/STATM.":[185]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
