{"id":"https://openalex.org/W4403579038","doi":"https://doi.org/10.48550/arxiv.2410.12995","title":"Configurable Embodied Data Generation for Class-Agnostic RGB-D Video Segmentation","display_name":"Configurable Embodied Data Generation for Class-Agnostic RGB-D Video Segmentation","publication_year":2024,"publication_date":"2024-10-16","ids":{"openalex":"https://openalex.org/W4403579038","doi":"https://doi.org/10.48550/arxiv.2410.12995"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.12995","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.12995","pdf_url":"https://arxiv.org/pdf/2410.12995","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.12995","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006804543","display_name":"Anthony W. Opipari","orcid":"https://orcid.org/0000-0002-4093-302X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Opipari, Anthony","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035133232","display_name":"Aravindhan K Krishnan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krishnan, Aravindhan K","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053446578","display_name":"Shreekant Gayaka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gayaka, Shreekant","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101561819","display_name":"Min Sun","orcid":"https://orcid.org/0000-0002-5624-6988"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Min","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105792706","display_name":"Cheng-Hao Kuo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuo, Cheng-Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050826088","display_name":"Arnie Sen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sen, Arnie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5071106238","display_name":"Odest Chadwicke Jenkins","orcid":"https://orcid.org/0000-0003-3750-7334"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jenkins, Odest Chadwicke","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5006804543"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.710099995136261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.710099995136261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6370000243186951,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.5914000272750854,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6962506771087646},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.686485767364502},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.655154824256897},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.571327805519104},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.5100422501564026},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5093961358070374},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.503208577632904},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3882673382759094}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6962506771087646},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.686485767364502},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.655154824256897},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.571327805519104},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.5100422501564026},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5093961358070374},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.503208577632904},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3882673382759094}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.12995","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.12995","pdf_url":"https://arxiv.org/pdf/2410.12995","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.12995","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.12995","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.12995","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.12995","pdf_url":"https://arxiv.org/pdf/2410.12995","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4403579038.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2380179524","https://openalex.org/W4283365723","https://openalex.org/W2963001125","https://openalex.org/W2091233881","https://openalex.org/W2352366064","https://openalex.org/W4250820896","https://openalex.org/W2124102101","https://openalex.org/W4250305970","https://openalex.org/W1484550171","https://openalex.org/W1570928019"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,57,77],"method":[4],"for":[5,39,61,99,125],"generating":[6],"large-scale":[7],"datasets":[8],"to":[9,68,111,129,136,160],"improve":[10],"class-agnostic":[11],"video":[12,27,92,105,116,163],"segmentation":[13,28,33,94,106,164],"across":[14],"robots":[15],"with":[16,102],"different":[17],"form":[18],"factors.":[19],"Specifically,":[20],"we":[21],"consider":[22],"the":[23,49],"question":[24],"of":[25],"whether":[26],"models":[29,135],"trained":[30],"on":[31,76],"generic":[32],"data":[34,50],"could":[35],"be":[36],"more":[37],"effective":[38],"particular":[40],"robot":[41,44,138],"platforms":[42],"if":[43],"embodiment":[45,79],"is":[46,59,97,171],"factored":[47],"into":[48],"generation":[51],"process.":[52],"To":[53],"answer":[54],"this":[55],"question,":[56],"pipeline":[58],"formulated":[60],"using":[62,123,150],"3D":[63,151],"reconstructions":[64],"(e.g.":[65,80],"from":[66],"HM3DSem)":[67],"generate":[69],"segmented":[70],"videos":[71],"that":[72,122,149],"are":[73],"configurable":[74],"based":[75],"robot's":[78],"sensor":[81,83],"type,":[82],"placement,":[84],"and":[85,104,155,166],"illumination":[86],"source).":[87],"A":[88],"resulting":[89],"massive":[90],"RGB-D":[91],"panoptic":[93],"dataset":[95],"(MVPd)":[96],"introduced":[98],"extensive":[100],"benchmarking":[101],"foundation":[103,134],"models,":[107],"as":[108,110,141],"well":[109],"support":[112],"embodiment-focused":[113],"research":[114],"in":[115,162],"segmentation.":[117],"Our":[118],"experimental":[119],"findings":[120],"demonstrate":[121],"MVPd":[124],"finetuning":[126],"can":[127,158],"lead":[128,159],"performance":[130],"improvements":[131,161],"when":[132],"transferring":[133],"certain":[137],"embodiments,":[139],"such":[140],"specific":[142],"camera":[143,156],"placements.":[144],"These":[145],"experiments":[146],"also":[147],"show":[148],"modalities":[152],"(depth":[153],"images":[154],"pose)":[157],"accuracy":[165],"consistency.":[167],"The":[168],"project":[169],"webpage":[170],"available":[172],"at":[173],"https://topipari.com/projects/MVPd":[174]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2024-10-20T00:00:00"}
