{"id":"https://openalex.org/W4391987783","doi":"https://doi.org/10.48550/arxiv.2402.11872","title":"Real-time 3D Semantic Scene Perception for Egocentric Robots with Binocular Vision","display_name":"Real-time 3D Semantic Scene Perception for Egocentric Robots with Binocular Vision","publication_year":2024,"publication_date":"2024-02-19","ids":{"openalex":"https://openalex.org/W4391987783","doi":"https://doi.org/10.48550/arxiv.2402.11872"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2402.11872","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.11872","pdf_url":"https://arxiv.org/pdf/2402.11872","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2402.11872","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101809540","display_name":"Khang Nguyen","orcid":"https://orcid.org/0000-0003-3471-5533"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nguyen, K.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002407078","display_name":"Trung Thanh Dang","orcid":"https://orcid.org/0000-0003-4782-2397"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dang, T.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5047174917","display_name":"Manfred Huber","orcid":"https://orcid.org/0009-0007-0294-9147"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huber, M.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101809540"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13579","display_name":"Image and Video Stabilization","score":0.9276999831199646,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13579","display_name":"Image and Video Stabilization","score":0.9276999831199646,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14270","display_name":"Simulation and Modeling Applications","score":0.9253000020980835,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.674609899520874},{"id":"https://openalex.org/keywords/binocular-disparity","display_name":"Binocular disparity","score":0.6589654684066772},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6175335645675659},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6013343334197998},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5041714906692505},{"id":"https://openalex.org/keywords/binocular-vision","display_name":"Binocular vision","score":0.501507043838501},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4329531192779541},{"id":"https://openalex.org/keywords/depth-perception","display_name":"Depth perception","score":0.4110822379589081},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.35819149017333984},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.05335685610771179}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.674609899520874},{"id":"https://openalex.org/C90790637","wikidata":"https://www.wikidata.org/wiki/Q11681118","display_name":"Binocular disparity","level":3,"score":0.6589654684066772},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6175335645675659},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6013343334197998},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5041714906692505},{"id":"https://openalex.org/C121958486","wikidata":"https://www.wikidata.org/wiki/Q609543","display_name":"Binocular vision","level":2,"score":0.501507043838501},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4329531192779541},{"id":"https://openalex.org/C52672216","wikidata":"https://www.wikidata.org/wiki/Q1749840","display_name":"Depth perception","level":3,"score":0.4110822379589081},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.35819149017333984},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.05335685610771179}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2402.11872","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.11872","pdf_url":"https://arxiv.org/pdf/2402.11872","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2402.11872","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2402.11872","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2402.11872","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.11872","pdf_url":"https://arxiv.org/pdf/2402.11872","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322015","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391987783.pdf","grobid_xml":"https://content.openalex.org/works/W4391987783.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2013778432","https://openalex.org/W2066996170","https://openalex.org/W2509650856","https://openalex.org/W2121745554","https://openalex.org/W2072783906","https://openalex.org/W2006858907","https://openalex.org/W2168424431","https://openalex.org/W2026531732","https://openalex.org/W2004704930","https://openalex.org/W2970155046"],"abstract_inverted_index":{"Perceiving":[0],"a":[1,174],"three-dimensional":[2],"(3D)":[3],"scene":[4,67],"with":[5,31,42,149,173],"multiple":[6,193],"objects":[7,106,189],"while":[8,153,195],"moving":[9],"indoors":[10],"is":[11,205],"essential":[12],"for":[13,18,39,63,155],"vision-based":[14],"mobile":[15],"cobots,":[16],"especially":[17],"enhancing":[19],"their":[20],"manipulation":[21],"tasks.":[22],"In":[23,116],"this":[24],"work,":[25],"we":[26,56,128,162],"present":[27],"an":[28,58],"end-to-end":[29],"pipeline":[30,166],"instance":[32],"segmentation,":[33,68],"feature":[34,126],"matching,":[35],"and":[36,45,197],"point-set":[37],"registration":[38],"egocentric":[40],"robots":[41],"binocular":[43],"vision,":[44],"demonstrate":[46],"the":[47,52,113,137,168,199],"robot's":[48],"grasping":[49],"capability":[50],"through":[51,86],"proposed":[53,165],"pipeline.":[54],"First,":[55],"design":[57],"RGB":[59,110],"image-based":[60],"segmentation":[61],"approach":[62],"single-view":[64],"3D":[65,78,91,125,132],"semantic":[66],"leveraging":[69],"common":[70],"object":[71,84],"classes":[72],"in":[73,109,124],"2D":[74],"datasets":[75],"to":[76,118],"encapsulate":[77],"points":[79],"into":[80],"point":[81,97,133,159],"clouds":[82,98],"of":[83,93,107,121,190],"instances":[85],"corresponding":[87],"depth":[88],"maps.":[89],"Next,":[90],"correspondences":[92,152],"two":[94],"consecutive":[95],"segmented":[96],"are":[99],"extracted":[100],"based":[101,135],"on":[102,136,167],"matched":[103],"keypoints":[104],"between":[105,158],"interest":[108],"images":[111],"from":[112],"prior":[114],"step.":[115],"addition,":[117],"be":[119],"aware":[120],"spatial":[122],"changes":[123],"distribution,":[127],"also":[129],"weigh":[130],"each":[131],"pair":[134],"estimated":[138],"distribution":[139],"using":[140],"kernel":[141],"density":[142],"estimation":[143],"(KDE),":[144],"which":[145],"subsequently":[146],"gives":[147],"robustness":[148],"less":[150],"central":[151],"solving":[154],"rigid":[156],"transformations":[157],"clouds.":[160],"Finally,":[161],"test":[163],"our":[164,185],"7-DOF":[169],"dual-arm":[170],"Baxter":[171],"robot":[172,186],"mounted":[175],"Intel":[176],"RealSense":[177],"D435i":[178],"RGB-D":[179],"camera.":[180],"The":[181,202],"result":[182],"shows":[183],"that":[184],"can":[187],"segment":[188],"interest,":[191],"register":[192],"views":[194],"moving,":[196],"grasp":[198],"target":[200],"object.":[201],"source":[203],"code":[204],"available":[206],"at":[207],"https://github.com/mkhangg/semantic_scene_perception.":[208]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2024-02-21T00:00:00"}
