{"id":"https://openalex.org/W4401417460","doi":"https://doi.org/10.1109/icra57147.2024.10610193","title":"Open-Fusion: Real-time Open-Vocabulary 3D Mapping and Queryable Scene Representation","display_name":"Open-Fusion: Real-time Open-Vocabulary 3D Mapping and Queryable Scene Representation","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401417460","doi":"https://doi.org/10.1109/icra57147.2024.10610193"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610193","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610193","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089288624","display_name":"Kashu Yamazaki","orcid":"https://orcid.org/0000-0001-6569-6860"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kashu Yamazaki","raw_affiliation_strings":["University of Arkansas,AICV Lab,Department of EECS,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Arkansas,AICV Lab,Department of EECS,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108918698","display_name":"Taisei Hanyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taisei Hanyu","raw_affiliation_strings":["University of Arkansas,AICV Lab,Department of EECS,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Arkansas,AICV Lab,Department of EECS,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109007400","display_name":"Khoa Vo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khoa Vo","raw_affiliation_strings":["University of Arkansas,AICV Lab,Department of EECS,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Arkansas,AICV Lab,Department of EECS,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051302276","display_name":"Thang M. Pham","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thang Pham","raw_affiliation_strings":["University of Arkansas,AICV Lab,Department of EECS,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Arkansas,AICV Lab,Department of EECS,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086350893","display_name":"Minh C. Tran","orcid":"https://orcid.org/0000-0003-2622-1365"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minh Tran","raw_affiliation_strings":["University of Arkansas,AICV Lab,Department of EECS,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Arkansas,AICV Lab,Department of EECS,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043587037","display_name":"Gianfranco Doretto","orcid":"https://orcid.org/0000-0002-8921-6646"},"institutions":[{"id":"https://openalex.org/I12097938","display_name":"West Virginia University","ror":"https://ror.org/011vxgd24","country_code":"US","type":"education","lineage":["https://openalex.org/I12097938"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gianfranco Doretto","raw_affiliation_strings":["West Virginia University,Department of CSCE,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"West Virginia University,Department of CSCE,USA","institution_ids":["https://openalex.org/I12097938"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035656066","display_name":"Anh Phong Nguyen","orcid":"https://orcid.org/0000-0002-6473-1780"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Anh Nguyen","raw_affiliation_strings":["University of Liverpool,Department of CS,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Liverpool,Department of CS,UK","institution_ids":["https://openalex.org/I146655781"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023725893","display_name":"Ngan Le","orcid":"https://orcid.org/0000-0003-2571-0511"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ngan Le","raw_affiliation_strings":["University of Arkansas,AICV Lab,Department of EECS,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Arkansas,AICV Lab,Department of EECS,USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5089288624"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.1172,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.96521262,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"9411","last_page":"9417"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7034738659858704},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5360947251319885},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.48365533351898193},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.44231465458869934},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4397696852684021},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.4183872938156128},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38997456431388855},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.10959747433662415},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09646698832511902},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.0639013946056366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7034738659858704},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5360947251319885},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.48365533351898193},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.44231465458869934},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4397696852684021},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.4183872938156128},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38997456431388855},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.10959747433662415},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09646698832511902},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0639013946056366},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610193","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610193","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4399999976158142,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1923184257","https://openalex.org/W2557169239","https://openalex.org/W2594519801","https://openalex.org/W2616247523","https://openalex.org/W2904904022","https://openalex.org/W2949830259","https://openalex.org/W2970641574","https://openalex.org/W2991453722","https://openalex.org/W3003372926","https://openalex.org/W3102564565","https://openalex.org/W3206072662","https://openalex.org/W4226452378","https://openalex.org/W4282919422","https://openalex.org/W4312424618","https://openalex.org/W4312563428","https://openalex.org/W4312956471","https://openalex.org/W4312960937","https://openalex.org/W4313136445","https://openalex.org/W4318718936","https://openalex.org/W4365606129","https://openalex.org/W4383108296","https://openalex.org/W4383108895","https://openalex.org/W4383994387","https://openalex.org/W4385430678","https://openalex.org/W4385431115","https://openalex.org/W4386075819","https://openalex.org/W4386083015","https://openalex.org/W4390872744","https://openalex.org/W4390874575","https://openalex.org/W4390874578","https://openalex.org/W6764040762","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6802517928","https://openalex.org/W6811013733","https://openalex.org/W6811433417","https://openalex.org/W6839745749","https://openalex.org/W6841082114","https://openalex.org/W6849177959","https://openalex.org/W6851607685","https://openalex.org/W6853628691"],"related_works":["https://openalex.org/W2349784553","https://openalex.org/W3022596247","https://openalex.org/W2601444686","https://openalex.org/W4307058054","https://openalex.org/W4292238148","https://openalex.org/W4323660495","https://openalex.org/W2385319785","https://openalex.org/W2900827440","https://openalex.org/W3167549738","https://openalex.org/W2381983017"],"abstract_inverted_index":{"Precise":[0],"3D":[1,35,69,91,107,118,146],"environmental":[2],"mapping":[3,36],"with":[4,89],"semantics":[5],"is":[6],"essential":[7],"in":[8],"robotics.":[9],"Existing":[10],"methods":[11,129],"often":[12],"rely":[13],"on":[14,122,164],"pre-defined":[15],"concepts":[16,152],"during":[17],"training":[18],"or":[19],"are":[20,86],"time-intensive":[21],"when":[22],"generating":[23],"semantic":[24,57],"maps.":[25,84],"This":[26],"paper":[27],"presents":[28],"Open-Fusion,":[29],"an":[30,96],"approach":[31],"for":[32,55,67,109,116],"real-time":[33,145],"open-vocabulary":[34],"and":[37,59,80,142,153],"queryable":[38],"scene":[39,70,147],"representation":[40],"using":[41,95],"RGB-D":[42],"data.":[43],"Open-Fusion":[44,103],"harnesses":[45],"the":[46,61,74,90,114,123,137,158,162],"power":[47],"of":[48,139],"a":[49],"pretrained":[50],"vision-language":[51],"foundation":[52],"model":[53],"(VLFM)":[54],"open-set":[56],"comprehension":[58,148],"employs":[60],"Truncated":[62],"Signed":[63],"Distance":[64],"Function":[65],"(TSDF)":[66],"swift":[68],"reconstruction.":[71],"By":[72],"leveraging":[73],"VLFM,":[75],"we":[76],"extract":[77],"region-based":[78,140],"embeddings":[79],"their":[81],"associated":[82],"confidence":[83],"These":[85],"then":[87],"integrated":[88],"knowledge":[92],"from":[93],"TSDF":[94],"enhanced":[97],"Hungarian-based":[98],"feature-matching":[99],"mechanism.":[100],"In":[101],"particular,":[102],"delivers":[104],"outstanding":[105],"annotation-free":[106],"segmentation":[108],"open":[110],"vocabulary":[111],"query":[112],"without":[113],"need":[115],"additional":[117],"training.":[119],"Benchmark":[120],"tests":[121],"ScanNet":[124],"dataset":[125],"against":[126],"leading":[127],"zero-shot":[128],"highlight":[130],"Open-Fusion\u2019s":[131],"superiority.":[132],"Furthermore,":[133],"it":[134],"seamlessly":[135],"combines":[136],"strengths":[138],"VLFM":[141],"TSDF,":[143],"facilitating":[144],"that":[149],"includes":[150],"object":[151],"open-world":[154],"semantics.":[155],"We":[156],"encourage":[157],"readers":[159],"to":[160],"view":[161],"demos":[163],"our":[165],"project":[166],"page:":[167],"https://uark-aicv.github.io/OpenFusion":[168]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":5}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
