{"id":"https://openalex.org/W7154303864","doi":"https://doi.org/10.48550/arxiv.2604.10951","title":"Fast-SegSim: Real-Time Open-Vocabulary Segmentation for Robotics in Simulation","display_name":"Fast-SegSim: Real-Time Open-Vocabulary Segmentation for Robotics in Simulation","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7154303864","doi":"https://doi.org/10.48550/arxiv.2604.10951"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10951","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10951","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10951","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133598386","display_name":"Xuan Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Xuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133576391","display_name":"Yuxuan Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Yuxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017271826","display_name":"Shichao Zhai","orcid":"https://orcid.org/0000-0001-6910-3655"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhai, Shichao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5096085092","display_name":"Shuhao Ye","orcid":"https://orcid.org/0009-0002-4438-3114"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Shuhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133615003","display_name":"Rong Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Rong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101556395","display_name":"Yue Wang","orcid":"https://orcid.org/0000-0002-4401-8929"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.3314000070095062,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.3314000070095062,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.10939999669790268,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.10480000078678131,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.579200029373169},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.5496000051498413},{"id":"https://openalex.org/keywords/graphics-pipeline","display_name":"Graphics pipeline","score":0.5367000102996826},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.525600016117096},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4528000056743622},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3817000091075897},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.37380000948905945},{"id":"https://openalex.org/keywords/frame-rate","display_name":"Frame rate","score":0.35839998722076416},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.34310001134872437}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7972999811172485},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7483000159263611},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6152999997138977},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.579200029373169},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.5496000051498413},{"id":"https://openalex.org/C173552908","wikidata":"https://www.wikidata.org/wiki/Q1366289","display_name":"Graphics pipeline","level":4,"score":0.5367000102996826},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.525600016117096},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4528000056743622},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3817000091075897},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.37380000948905945},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.35839998722076416},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.34310001134872437},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3156000077724457},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.3068999946117401},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2854999899864197},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.25519999861717224},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10951","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10951","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10951","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10951","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6000192165374756,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Open-vocabulary":[0],"panoptic":[1],"reconstruction":[2,13],"is":[3,77],"crucial":[4],"for":[5,46,155,170],"advanced":[6],"robotics":[7],"and":[8,56,69,106,130,160,203],"simulation.":[9],"However,":[10],"existing":[11],"3D":[12],"methods,":[14],"such":[15],"as":[16,150],"NeRF":[17],"or":[18],"Gaussian":[19,62,123],"Splatting":[20],"variants,":[21],"often":[22],"struggle":[23],"to":[24,65,102,125,184,209],"achieve":[25],"the":[26,42,86,116,121,181,186,195,211],"real-time":[27],"inference":[28],"frequency":[29],"required":[30,45],"by":[31,179],"robotic":[32,145],"control":[33],"loops.":[34],"Existing":[35],"methods":[36],"incur":[37],"prohibitive":[38],"latency":[39],"when":[40],"processing":[41],"high-dimensional":[43],"features":[44],"robust":[47],"open-vocabulary":[48,71],"segmentation.":[49],"We":[50,94,175],"propose":[51],"Fast-SegSim,":[52],"a":[53,78,107,151],"novel,":[54],"simple,":[55],"end-to-end":[57],"framework":[58],"built":[59],"upon":[60],"2D":[61,122],"Splatting,":[63],"designed":[64],"realize":[66],"real-time,":[67],"high-fidelity,":[68],"3D-consistent":[70,162],"segmentation":[72,91],"reconstruction.":[73],"Our":[74,199],"core":[75],"contribution":[76],"highly":[79],"optimized":[80],"rendering":[81,201],"pipeline":[82],"that":[83],"specifically":[84],"addresses":[85],"computational":[87],"bottleneck":[88],"of":[89],"high-channel":[90],"feature":[92,128],"accumulation.":[93],"introduce":[95],"two":[96],"key":[97],"optimizations:":[98],"Precise":[99],"Tile":[100],"Intersection":[101],"reduce":[103],"rasterization":[104],"redundancy,":[105],"novel":[108],"Top-K":[109],"Hard":[110],"Selection":[111],"strategy.":[112],"This":[113],"strategy":[114],"leverages":[115],"geometric":[117],"sparsity":[118],"inherent":[119],"in":[120,144,189],"representation":[124],"greatly":[126],"simplify":[127],"accumulation":[129],"alleviate":[131],"bandwidth":[132],"limitations,":[133],"achieving":[134],"render":[135],"rates":[136],"exceeding":[137],"40":[138],"FPS.":[139],"Fast-SegSim":[140],"provides":[141],"critical":[142],"value":[143],"applications:":[146],"it":[147],"serves":[148],"both":[149],"high-frequency":[152],"sensor":[153],"input":[154],"simulation":[156],"platforms":[157],"like":[158],"Gazebo,":[159],"its":[161],"outputs":[163],"provide":[164],"essential":[165],"multi-view":[166],"'ground":[167],"truth'":[168],"labels":[169,183],"fine-tuning":[171],"downstream":[172],"perception":[173,187],"tasks.":[174],"demonstrate":[176],"this":[177],"utility":[178,205],"using":[180],"generated":[182],"fine-tune":[185],"module":[188],"object":[190],"goal":[191],"navigation,":[192],"successfully":[193],"doubling":[194],"navigation":[196],"success":[197],"rate.":[198],"superior":[200],"speed":[202],"practical":[204],"underscore":[206],"Fast-SegSim's":[207],"potential":[208],"bridge":[210],"sim-to-real":[212],"gap.":[213]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-15T00:00:00"}
