{"id":"https://openalex.org/W4401416294","doi":"https://doi.org/10.1109/icra57147.2024.10611588","title":"SlotGNN: Unsupervised Discovery of Multi-Object Representations and Visual Dynamics","display_name":"SlotGNN: Unsupervised Discovery of Multi-Object Representations and Visual Dynamics","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401416294","doi":"https://doi.org/10.1109/icra57147.2024.10611588"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10611588","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icra57147.2024.10611588","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088480186","display_name":"Alireza Rezazadeh","orcid":"https://orcid.org/0000-0002-2457-9470"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alireza Rezazadeh","raw_affiliation_strings":["University of Minnesota,Department of Electrical and Computer Engineering,Minneapolis,MN,USA,55455"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Minnesota,Department of Electrical and Computer Engineering,Minneapolis,MN,USA,55455","institution_ids":["https://openalex.org/I130238516"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093047612","display_name":"Athreyi Badithela","orcid":null},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Athreyi Badithela","raw_affiliation_strings":["University of Minnesota,Department of Computer Science and Engineering,Minneapolis,MN,USA,55455"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Minnesota,Department of Computer Science and Engineering,Minneapolis,MN,USA,55455","institution_ids":["https://openalex.org/I130238516"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052038649","display_name":"Karthik Desingh","orcid":"https://orcid.org/0000-0002-1817-1575"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Karthik Desingh","raw_affiliation_strings":["University of Minnesota,Department of Computer Science and Engineering,Minneapolis,MN,USA,55455"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Minnesota,Department of Computer Science and Engineering,Minneapolis,MN,USA,55455","institution_ids":["https://openalex.org/I130238516"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057994317","display_name":"Changhyun Choi","orcid":"https://orcid.org/0000-0003-4715-3576"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Changhyun Choi","raw_affiliation_strings":["University of Minnesota,Department of Electrical and Computer Engineering,Minneapolis,MN,USA,55455"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Minnesota,Department of Electrical and Computer Engineering,Minneapolis,MN,USA,55455","institution_ids":["https://openalex.org/I130238516"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I130238516"],"apc_list":null,"apc_paid":null,"fwci":0.4285,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.60963615,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"33","issue":null,"first_page":"17508","last_page":"17514"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7530930042266846},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.6431905031204224},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5133825540542603},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.46722611784935},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.32521870732307434}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7530930042266846},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.6431905031204224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5133825540542603},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.46722611784935},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32521870732307434},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10611588","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icra57147.2024.10611588","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320317106","display_name":"Sony","ror":"https://ror.org/04wzv3n59"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1505952289","https://openalex.org/W2028678875","https://openalex.org/W2052021678","https://openalex.org/W2158782408","https://openalex.org/W2528489519","https://openalex.org/W2770604561","https://openalex.org/W2952915411","https://openalex.org/W2955368974","https://openalex.org/W2962785568","https://openalex.org/W2964112890","https://openalex.org/W2967246123","https://openalex.org/W3037784242","https://openalex.org/W3088304681","https://openalex.org/W3131075173","https://openalex.org/W4246233237","https://openalex.org/W4288322145","https://openalex.org/W4385696075","https://openalex.org/W4390874575","https://openalex.org/W6720501231","https://openalex.org/W6729508183","https://openalex.org/W6736685754","https://openalex.org/W6746445604","https://openalex.org/W6748320467","https://openalex.org/W6751350349","https://openalex.org/W6764529735","https://openalex.org/W6765456200","https://openalex.org/W6769281838","https://openalex.org/W6774033856","https://openalex.org/W6779809370","https://openalex.org/W6781776314","https://openalex.org/W6782766965","https://openalex.org/W6800564222","https://openalex.org/W6809898026"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Learning":[0],"multi-object":[1,94,122,187],"dynamics":[2,50,115,215],"from":[3,41,51,136],"visual":[4,170],"data":[5],"using":[6,132],"unsupervised":[7,66,113,194],"techniques":[8],"is":[9,60],"challenging":[10,186],"due":[11],"to":[12,75,100,144],"the":[13,84,91,118,130,133,146,158,177,199],"need":[14],"for":[15,37,46,65],"robust,":[16],"object":[17,39,67],"representations":[18,40],"that":[19,88,166],"can":[20],"be":[21],"learned":[22],"through":[23],"robot":[24,55,154],"interactions.":[25,56],"This":[26,82],"paper":[27],"presents":[28],"a":[29,71,111,126],"novel":[30,112],"framework":[31,208],"with":[32],"two":[33],"new":[34],"architectures:":[35],"SlotTransport":[36,58,137,161],"discovering":[38],"RGB":[42,52],"images":[43,53],"and":[44,54,69,138,141,171,189,212],"SlotGNN":[45,124,180],"predicting":[47],"their":[48,213],"collective":[49],"Our":[57,109,220],"architecture":[59],"based":[61],"on":[62,153],"slot":[63,151],"attention":[64],"discovery":[68,85],"uses":[70],"feature":[72],"transport":[73],"mechanism":[74],"maintain":[76],"temporal":[77],"alignment":[78],"in":[79,162,181,198,216],"object-centric":[80,164],"representations.":[81],"enables":[83],"of":[86,93,121,129,149,160,179],"slots":[87,97,135,211],"consistently":[89],"reflect":[90],"composition":[92],"scenes.":[95,123],"These":[96],"robustly":[98,209],"bind":[99],"distinct":[101],"objects,":[102],"even":[103],"under":[104],"heavy":[105],"occlusion":[106],"or":[107],"absence.":[108],"SlotGNN,":[110],"graph-based":[114],"model,":[116],"predicts":[117,210],"future":[119,147],"state":[120],"learns":[125],"graph":[127],"representation":[128],"scene":[131],"discovered":[134],"performs":[139],"relational":[140],"spatial":[142],"reasoning":[143],"predict":[145],"appearance":[148],"each":[150],"conditioned":[152],"actions.":[155],"We":[156],"demonstrate":[157],"effectiveness":[159],"learning":[163],"features":[165],"accurately":[167],"encode":[168],"both":[169],"positional":[172],"information.":[173],"Further,":[174],"we":[175],"highlight":[176],"accuracy":[178],"downstream":[182],"robotic":[183],"tasks,":[184],"including":[185],"rearrangement":[188],"long-horizon":[190],"prediction.":[191],"Finally,":[192],"our":[193,207],"approach":[195],"proves":[196],"effective":[197],"real":[200],"world.":[201],"With":[202],"only":[203],"minimal":[204],"additional":[205],"data,":[206],"corresponding":[214],"real-world":[217],"control":[218],"tasks.":[219],"project":[221],"webpage:":[222],"bit.ly/slotgnn.":[223]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
