{"id":"https://openalex.org/W7138084414","doi":"https://doi.org/10.1609/aaai.v40i16.38322","title":"Predicting Video Slot Attention Queries from Random Slot-Feature Pairs","display_name":"Predicting Video Slot Attention Queries from Random Slot-Feature Pairs","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138084414","doi":"https://doi.org/10.1609/aaai.v40i16.38322"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i16.38322","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i16.38322","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i16.38322","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102685471","display_name":"Rongzhen Zhao","orcid":"https://orcid.org/0009-0000-3964-7336"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rongzhen Zhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129710882","display_name":"Jian Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jian Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129743378","display_name":"Juho Kannala","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juho Kannala","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129705105","display_name":"Joni Pajarinen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joni Pajarinen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102685471"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38851603,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"16","first_page":"13208","last_page":"13216"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3508000075817108,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3508000075817108,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.2076999992132187,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.047600001096725464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.649399995803833},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5874000191688538},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5724999904632568},{"id":"https://openalex.org/keywords/news-aggregator","display_name":"News aggregator","score":0.515999972820282},{"id":"https://openalex.org/keywords/query-language","display_name":"Query language","score":0.4223000109195709},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.41909998655319214},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.40700000524520874},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.39149999618530273}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8043000102043152},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.649399995803833},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5874000191688538},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5724999904632568},{"id":"https://openalex.org/C180505990","wikidata":"https://www.wikidata.org/wiki/Q498267","display_name":"News aggregator","level":2,"score":0.515999972820282},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4691999852657318},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.4223000109195709},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.41909998655319214},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.40700000524520874},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.39149999618530273},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.3889999985694885},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.38679999113082886},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.3741999864578247},{"id":"https://openalex.org/C172722865","wikidata":"https://www.wikidata.org/wiki/Q2302053","display_name":"Spatial query","level":5,"score":0.34929999709129333},{"id":"https://openalex.org/C194232998","wikidata":"https://www.wikidata.org/wiki/Q1606712","display_name":"Transition (genetics)","level":3,"score":0.34599998593330383},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.34450000524520874},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.32409998774528503},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.31709998846054077},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31310001015663147},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2741999924182892},{"id":"https://openalex.org/C118689300","wikidata":"https://www.wikidata.org/wiki/Q7978614","display_name":"Web query classification","level":4,"score":0.26840001344680786},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26759999990463257},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.26460000872612}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i16.38322","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i16.38322","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i16.38322","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i16.38322","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Unsupervised":[0],"video":[1,20,31,155],"Object-Centric":[2],"Learning":[3],"(OCL)":[4],"is":[5,53],"promising":[6],"as":[7,15],"it":[8,140],"enables":[9],"object-level":[10],"scene":[11,147,177],"representation":[12,148],"and":[13,76,113],"understanding":[14],"we":[16,93],"humans":[17],"do.":[18],"Mainstream":[19],"OCL":[21,156],"methods":[22,157],"adopt":[23],"a":[24,106],"recurrent":[25],"architecture:":[26],"An":[27],"aggregator":[28],"aggregates":[29],"current":[30,44],"frame":[32,67],"into":[33],"object":[34,165],"features,":[35,68,114],"termed":[36],"slots,":[37],"under":[38],"some":[39],"queries;":[40],"A":[41],"transitioner":[42,108,126],"transits":[43],"slots":[45,112],"to":[46,64,79,109,127,141,161],"queries":[47,129],"for":[48,73,86,98,119],"the":[49,69,83,125],"next":[50,66],"frame.":[51],"This":[52],"an":[54],"effective":[55],"architecture":[56],"but":[57],"all":[58],"existing":[59,154],"implementations":[60],"both":[61,111],"(i1)":[62],"neglect":[63],"incorporate":[65,110],"most":[70],"informative":[71],"source":[72],"query":[74,87,120],"prediction,":[75],"(i2)":[77],"fail":[78],"learn":[80,142],"transition":[81,143],"dynamics,":[82],"knowledge":[84],"essential":[85],"prediction.":[88],"To":[89],"address":[90],"these":[91],"issues,":[92],"propose":[94],"Random":[95],"Slot-Feature":[96],"pair":[97],"learning":[99],"Query":[100],"prediction":[101],"(RandSF.Q):":[102],"(t1)":[103],"We":[104,123],"design":[105],"new":[107,168],"which":[115,138],"provides":[116],"more":[117],"information":[118],"prediction;":[121],"(t2)":[122],"train":[124],"predict":[128],"from":[130,135],"slot-feature":[131],"pairs":[132],"randomly":[133],"sampled":[134],"available":[136],"recurrences,":[137],"drives":[139],"dynamics.":[144],"Experiments":[145],"on":[146,164],"demonstrate":[149],"that":[150],"our":[151],"method":[152],"surpass":[153],"significantly,":[158],"e.g.,":[159],"up":[160],"10":[162],"points":[163],"discovery,":[166],"setting":[167],"state-of-the-art.":[169],"Such":[170],"superiority":[171],"also":[172],"benefits":[173],"downstream":[174],"tasks":[175],"like":[176],"understanding.":[178]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
