{"id":"https://openalex.org/W4390523398","doi":"https://doi.org/10.1145/3595916.3626405","title":"Reimagining 3D Visual Grounding: Instance Segmentation and Transformers for Fragmented Point Cloud Scenarios","display_name":"Reimagining 3D Visual Grounding: Instance Segmentation and Transformers for Fragmented Point Cloud Scenarios","publication_year":2023,"publication_date":"2023-12-06","ids":{"openalex":"https://openalex.org/W4390523398","doi":"https://doi.org/10.1145/3595916.3626405"},"language":"en","primary_location":{"id":"doi:10.1145/3595916.3626405","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626405","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626405","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626405","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035994850","display_name":"Zehan Tan","orcid":"https://orcid.org/0000-0002-0931-8985"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zehan Tan","raw_affiliation_strings":["Fudan University, China"],"raw_orcid":"https://orcid.org/0000-0002-0931-8985","affiliations":[{"raw_affiliation_string":"Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101865874","display_name":"Weidong Yang","orcid":"https://orcid.org/0000-0002-6473-9272"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weidong Yang","raw_affiliation_strings":["Fudan University, China"],"raw_orcid":"https://orcid.org/0000-0002-6473-9272","affiliations":[{"raw_affiliation_string":"Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072047780","display_name":"Zhiwei Wang","orcid":"https://orcid.org/0009-0004-6994-4146"},"institutions":[{"id":"https://openalex.org/I4210160637","display_name":"Gree (China)","ror":"https://ror.org/053zdpn86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210160637"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiwei Wang","raw_affiliation_strings":["GREE ELECTRIC APPLIANCES, INC. OF ZHUHAI, China"],"raw_orcid":"https://orcid.org/0009-0004-6994-4146","affiliations":[{"raw_affiliation_string":"GREE ELECTRIC APPLIANCES, INC. OF ZHUHAI, China","institution_ids":["https://openalex.org/I4210160637"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3368,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.60665286,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.808198094367981},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.7616068720817566},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7615841627120972},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5334957242012024},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5108961462974548},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5095882415771484},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4636925458908081},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4491240978240967},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4228935241699219},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.41935718059539795},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4024209976196289},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34698763489723206},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.09534275531768799},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09490084648132324},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07733979821205139}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.808198094367981},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.7616068720817566},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7615841627120972},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5334957242012024},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5108961462974548},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5095882415771484},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4636925458908081},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4491240978240967},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4228935241699219},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.41935718059539795},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4024209976196289},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34698763489723206},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.09534275531768799},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09490084648132324},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07733979821205139},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3595916.3626405","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626405","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626405","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3595916.3626405","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626405","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626405","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390523398.pdf","grobid_xml":"https://content.openalex.org/works/W4390523398.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W1903029394","https://openalex.org/W2021851106","https://openalex.org/W2104422351","https://openalex.org/W2167667767","https://openalex.org/W2558535589","https://openalex.org/W2565639579","https://openalex.org/W2594519801","https://openalex.org/W2963150697","https://openalex.org/W2964345792","https://openalex.org/W3043971245","https://openalex.org/W3100732527","https://openalex.org/W3135367836","https://openalex.org/W3159619744","https://openalex.org/W4236965008","https://openalex.org/W4247971859","https://openalex.org/W4287640426"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W972276598","https://openalex.org/W4246352526","https://openalex.org/W2121910908","https://openalex.org/W915438175"],"abstract_inverted_index":{"This":[0],"work":[1],"introduces":[2],"a":[3,51,68,89,126],"pioneering,":[4],"engineerable":[5],"approach":[6,66],"to":[7],"3D":[8],"visual":[9],"localization(3DVG).":[10],"Current":[11],"challenges":[12],"for":[13,54,71,129],"2D":[14],"Visual":[15],"Grounding":[16],"(2DVG)":[17],"and":[18,29,40,62,78,95,102,122,138],"3DVG":[19,121],"are":[20,140],"summarized:":[21],"Absence":[22],"of":[23,32,43,88,108],"Depth":[24],"Information":[25],"in":[26,37,93],"2DVG,":[27],"Memory":[28],"Computational":[30],"Demands":[31],"Global":[33],"Point":[34],"Clouds,":[35],"Limitations":[36],"Dynamic":[38],"Scenarios,":[39],"Limited":[41],"Understanding":[42],"Spatial":[44],"Localization":[45],"Reference":[46],"Frames.":[47],"Our":[48],"solution":[49],"proposes":[50],"Re_3DVG":[52],"method":[53],"fragmented":[55],"point":[56],"cloud":[57],"scenarios.":[58],"Utilizing":[59],"instance":[60],"segmentation":[61],"transformer":[63],"models,":[64,124],"our":[65,109,116],"offers":[67],"potent":[69],"mechanism":[70],"establishing":[72,125],"robust":[73],"correspondences":[74],"between":[75],"text":[76],"queries":[77],"object":[79,100],"instances":[80],"within":[81,132],"the":[82,106],"shared":[83],"visible":[84],"range.":[85],"The":[86,135],"introduction":[87],"FragCloud3DRef":[90],"dataset,":[91],"grounded":[92],"ScanNet":[94],"supplemented":[96],"with":[97],"RGB":[98],"data,":[99],"segmentation,":[101],"textual":[103],"descriptions,":[104],"fortifies":[105],"effectiveness":[107],"proposed":[110],"model.":[111],"Experimental":[112],"outcomes":[113],"display":[114],"that":[115],"model":[117],"excels":[118],"beyond":[119],"conventional":[120],"2DVG":[123],"formidable":[127],"benchmark":[128],"future":[130],"research":[131],"this":[133],"discipline.":[134],"code":[136],"source":[137],"dataset":[139],"open":[141],"at":[142],"https://github.com/zehantan6970/Reimagining_3DVG.":[143]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
