{"id":"https://openalex.org/W7126057145","doi":"https://doi.org/10.48550/arxiv.2601.20381","title":"STORM: Slot-based Task-aware Object-centric Representation for robotic Manipulation","display_name":"STORM: Slot-based Task-aware Object-centric Representation for robotic Manipulation","publication_year":2026,"publication_date":"2026-01-28","ids":{"openalex":"https://openalex.org/W7126057145","doi":"https://doi.org/10.48550/arxiv.2601.20381"},"language":"en","primary_location":{"id":"pmh:doi:10.48550/arxiv.2601.20381","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006353636","display_name":"Alexandre Chapin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chapin, Alexandre","raw_affiliation_strings":["LIRIS"],"affiliations":[{"raw_affiliation_string":"LIRIS","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066469792","display_name":"Emmanuel Dellandr\u00e9a","orcid":"https://orcid.org/0000-0001-7346-228X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dellandr\u00e9a, Emmanuel","raw_affiliation_strings":["LIRIS"],"affiliations":[{"raw_affiliation_string":"LIRIS","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100443536","display_name":"Liming Chen","orcid":"https://orcid.org/0000-0003-0200-7989"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Liming","raw_affiliation_strings":["LIRIS"],"affiliations":[{"raw_affiliation_string":"LIRIS","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006353636"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.8698999881744385,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.8698999881744385,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.041600000113248825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.012799999676644802,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5633999705314636},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5127999782562256},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.3828999996185303},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.3702000081539154},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3659999966621399},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.3569999933242798},{"id":"https://openalex.org/keywords/teleoperation","display_name":"Teleoperation","score":0.34439998865127563},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.33809998631477356},{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.32839998602867126}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.70169997215271},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6128000020980835},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5633999705314636},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5127999782562256},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3828999996185303},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.3702000081539154},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C161759796","wikidata":"https://www.wikidata.org/wiki/Q3982902","display_name":"Teleoperation","level":3,"score":0.34439998865127563},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.33809998631477356},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.32839998602867126},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3264000117778778},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.303600013256073},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.30140000581741333},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.2946999967098236},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.29109999537467957},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.25369998812675476},{"id":"https://openalex.org/C2779038628","wikidata":"https://www.wikidata.org/wiki/Q7248497","display_name":"Programming by demonstration","level":3,"score":0.25110000371932983}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:doi:10.48550/arxiv.2601.20381","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"pmh:oai:HAL:hal-05477440v1","is_oa":false,"landing_page_url":"https://hal.science/hal-05477440","pdf_url":null,"source":{"id":"https://openalex.org/S4406922454","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2026","raw_type":"Preprints, Working Papers, ..."},{"id":"doi:10.48550/arxiv.2601.20381","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.20381","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2601.20381","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Visual":[0],"foundation":[1,43,127,147],"models":[2,44],"provide":[3],"strong":[4],"perceptual":[5],"features":[6,129,149],"for":[7,31,52,144,154],"robotics,":[8],"but":[9],"their":[10],"dense":[11],"representations":[12,133,153],"lack":[13],"explicit":[14],"object-level":[15],"structure,":[16],"limiting":[17],"robustness":[18],"and":[19,92,107,119],"contractility":[20],"in":[21],"manipulation":[22,83,109],"tasks.":[23],"We":[24],"propose":[25],"STORM":[26,60,113],"(Slot-based":[27],"Task-aware":[28],"Object-centric":[29],"Representation":[30],"robotic":[32,53,155],"Manipulation),":[33],"a":[34,46,62,81],"lightweight":[35],"object-centric":[36,66,132,152],"adaptation":[37,139],"module":[38],"that":[39,112],"augments":[40],"frozen":[41,126],"visual":[42,117],"with":[45,80,99],"small":[47],"set":[48],"of":[49],"semantic-aware":[50],"slots":[51,67],"manipulation.":[54],"Rather":[55],"than":[56],"retraining":[57],"large":[58],"backbones,":[59],"employs":[61],"multi-phase":[63,138],"training":[64,131],"strategy:":[65],"are":[68],"first":[69],"stabilized":[70],"through":[71],"visual--semantic":[72],"pretraining":[73],"using":[74,125],"language":[75],"embeddings,":[76],"then":[77],"jointly":[78],"adapted":[79],"downstream":[82],"policy.":[84],"This":[85],"staged":[86],"learning":[87],"prevents":[88],"degenerate":[89],"slot":[90],"formation":[91],"preserves":[93],"semantic":[94],"consistency":[95],"while":[96],"aligning":[97],"perception":[98],"task":[100],"objectives.":[101],"Experiments":[102],"on":[103],"object":[104],"discovery":[105],"benchmarks":[106],"simulated":[108],"tasks":[110],"show":[111],"improves":[114],"generalization":[115],"to":[116,123],"distractors,":[118],"control":[120],"performance":[121],"compared":[122],"directly":[124],"model":[128,148],"or":[130],"end-to-end.":[134],"Our":[135],"results":[136],"highlight":[137],"as":[140],"an":[141],"efficient":[142],"mechanism":[143],"transforming":[145],"generic":[146],"into":[150],"task-aware":[151],"control.":[156]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-01-30T00:00:00"}
