{"id":"https://openalex.org/W4416252215","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227165","title":"ORSA-T: Multi-View Object-Centric Scene Representation Learning with Slot Attention and Transformer","display_name":"ORSA-T: Multi-View Object-Centric Scene Representation Learning with Slot Attention and Transformer","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416252215","doi":"https://doi.org/10.1109/ijcnn64981.2025.11227165"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11227165","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120450599","display_name":"Henri Placek","orcid":null},"institutions":[{"id":"https://openalex.org/I165862685","display_name":"St George's, University of London","ror":"https://ror.org/040f08y74","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I165862685"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Henri Placek","raw_affiliation_strings":["University of London,Department of Computer Science City St George&#x2019;s,United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of London,Department of Computer Science City St George&#x2019;s,United Kingdom","institution_ids":["https://openalex.org/I165862685"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064657398","display_name":"Chris Child","orcid":"https://orcid.org/0000-0001-5425-2308"},"institutions":[{"id":"https://openalex.org/I165862685","display_name":"St George's, University of London","ror":"https://ror.org/040f08y74","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I165862685"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chris Child","raw_affiliation_strings":["University of London,Department of Computer Science City St George&#x2019;s,United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of London,Department of Computer Science City St George&#x2019;s,United Kingdom","institution_ids":["https://openalex.org/I165862685"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018493892","display_name":"Tillman Weyde","orcid":"https://orcid.org/0000-0001-8028-9905"},"institutions":[{"id":"https://openalex.org/I165862685","display_name":"St George's, University of London","ror":"https://ror.org/040f08y74","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I165862685"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tillman Weyde","raw_affiliation_strings":["University of London,Department of Computer Science City St George&#x2019;s,United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of London,Department of Computer Science City St George&#x2019;s,United Kingdom","institution_ids":["https://openalex.org/I165862685"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5120450599"],"corresponding_institution_ids":["https://openalex.org/I165862685"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1951746,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.3431999981403351,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.3431999981403351,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.19699999690055847,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.07519999891519547,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6974999904632568},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6244999766349792},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5591999888420105},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5493000149726868},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.44920000433921814},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.43650001287460327},{"id":"https://openalex.org/keywords/scene-statistics","display_name":"Scene statistics","score":0.43639999628067017}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7328000068664551},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6974999904632568},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6798999905586243},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6244999766349792},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5591999888420105},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5493000149726868},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4823000133037567},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.44920000433921814},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.43650001287460327},{"id":"https://openalex.org/C197654239","wikidata":"https://www.wikidata.org/wiki/Q7430757","display_name":"Scene statistics","level":3,"score":0.43639999628067017},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.38179999589920044},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.36910000443458557},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.33059999346733093},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.3176000118255615},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.31700000166893005},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C2776010242","wikidata":"https://www.wikidata.org/wiki/Q4677575","display_name":"Active perception","level":3,"score":0.2854999899864197},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26030001044273376}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11227165","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11227165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:openaccess.city.ac.uk:35841","is_oa":false,"landing_page_url":"https://openaccess.city.ac.uk/view/creators_id/henri=2Eplacek.html>,","pdf_url":null,"source":{"id":"https://openalex.org/S4306401940","display_name":"City Research Online (City University London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I180825142","host_organization_name":"City, University of London","host_organization_lineage":["https://openalex.org/I180825142"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1928278792","https://openalex.org/W2127859399","https://openalex.org/W2149524323","https://openalex.org/W2154396137","https://openalex.org/W2158782408","https://openalex.org/W2222512263","https://openalex.org/W2561715562","https://openalex.org/W2808492412","https://openalex.org/W2903585023","https://openalex.org/W3109585842","https://openalex.org/W4200632985","https://openalex.org/W4380635116","https://openalex.org/W4385245566"],"related_works":[],"abstract_inverted_index":{"Understanding":[0],"a":[1,95,105,150],"scene":[2,23,75,100,124,173],"from":[3,38,149],"multiple,":[4],"potentially":[5],"partial":[6,29,139],"views":[7,30,33,93],"and":[8,18,45,81,97,118,132,146,159,164],"decomposing":[9],"it":[10,175],"into":[11],"objects":[12,130],"is":[13,46,176],"foundational":[14],"for":[15,178],"human":[16],"perception":[17],"intelligence.":[19],"Current":[20],"multi-view":[21],"object-centric":[22],"representation":[24,101],"learning":[25,77],"models":[26],"that":[27],"use":[28,179],"analyze":[31],"all":[32,115],"at":[34],"once.":[35],"This":[36],"differs":[37],"the":[39,66,99,119,156],"way":[40],"humans":[41],"process":[42],"visual":[43],"information":[44],"not":[47],"compatible":[48],"with":[49,78,88,110],"reinforcement":[50,181],"learning,":[51],"where":[52],"an":[53,89],"agent":[54],"learns":[55,168],"about":[56],"its":[57,172],"environment":[58],"through":[59],"actions,":[60],"such":[61],"as":[62],"moving":[63],"to":[64,122,138,170],"change":[65],"viewpoint.":[67],"In":[68,141],"this":[69],"paper,":[70],"we":[71],"propose":[72],"ORSA-T":[73,128,144,167],"(Object-centric":[74],"Representation":[76],"Slot":[79,86],"Attention":[80,87],"Transformer),":[82],"which":[83,126],"combines":[84],"Implicit":[85],"aggregation":[90,162],"of":[91,107],"previous":[92,116],"by":[94],"Transformer":[96,113],"improves":[98],"iteratively":[102,169],"based":[103],"on":[104],"sequence":[106],"images":[108,148],"annotated":[109],"viewpoints.":[111],"The":[112],"uses":[114],"representations":[117],"current":[120,157],"update":[121],"aggregate":[123],"information,":[125],"makes":[127],"remember":[129],"better":[131,153],"learn":[133],"more":[134],"effectively":[135],"when":[136],"applied":[137],"views.":[140],"our":[142],"experiments,":[143],"predicts":[145],"segments":[147],"new":[151],"viewpoint":[152],"than":[154],"MulMON,":[155],"SOTA,":[158],"ORSA":[160],"without":[161],"connections":[163],"Transformer.":[165],"As":[166],"improve":[171],"representation,":[174],"suitable":[177],"in":[180],"learning.":[182]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
