{"id":"https://openalex.org/W4387969417","doi":"https://doi.org/10.1145/3581783.3611902","title":"Dense Object Grounding in 3D Scenes","display_name":"Dense Object Grounding in 3D Scenes","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4387969417","doi":"https://doi.org/10.1145/3581783.3611902"},"language":"en","primary_location":{"id":"doi:10.1145/3581783.3611902","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3611902","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072986859","display_name":"Wencan Huang","orcid":"https://orcid.org/0000-0002-1555-3674"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wencan Huang","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1555-3674","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078220957","display_name":"Daizong Liu","orcid":"https://orcid.org/0000-0001-8179-4508"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daizong Liu","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8179-4508","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059045087","display_name":"Wei Hu","orcid":"https://orcid.org/0000-0002-9860-0922"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Hu","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9860-0922","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072986859"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":1.2952,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.82861319,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5017","last_page":"5026"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8097514510154724},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.7142005562782288},{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.6439441442489624},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6138423085212708},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.5782895684242249},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5668309926986694},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5645090341567993},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5209773182868958},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5067747235298157},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47895270586013794},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.43787187337875366},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4175393581390381},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4044959545135498}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8097514510154724},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.7142005562782288},{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.6439441442489624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6138423085212708},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.5782895684242249},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5668309926986694},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5645090341567993},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5209773182868958},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5067747235298157},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47895270586013794},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.43787187337875366},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4175393581390381},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4044959545135498},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581783.3611902","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3611902","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6000000238418579}],"awards":[{"id":"https://openalex.org/G3604809996","display_name":null,"funder_award_id":"61972009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W2489434015","https://openalex.org/W2594519801","https://openalex.org/W2747329762","https://openalex.org/W2907143950","https://openalex.org/W2962766617","https://openalex.org/W2963109634","https://openalex.org/W2979727876","https://openalex.org/W2988715931","https://openalex.org/W3034655362","https://openalex.org/W3034949383","https://openalex.org/W3034952258","https://openalex.org/W3092739351","https://openalex.org/W3095974555","https://openalex.org/W3096609285","https://openalex.org/W3112077297","https://openalex.org/W3133833192","https://openalex.org/W3138516171","https://openalex.org/W3140398265","https://openalex.org/W3159619744","https://openalex.org/W3172752666","https://openalex.org/W3174572181","https://openalex.org/W3175082063","https://openalex.org/W3175234951","https://openalex.org/W3179868941","https://openalex.org/W3182910454","https://openalex.org/W3203949114","https://openalex.org/W3206171352","https://openalex.org/W4214526701","https://openalex.org/W4214624153","https://openalex.org/W4214684415","https://openalex.org/W4214773923","https://openalex.org/W4221166385","https://openalex.org/W4221167937","https://openalex.org/W4285606658","https://openalex.org/W4310286406","https://openalex.org/W4312274934","https://openalex.org/W4312385518","https://openalex.org/W4312749817","https://openalex.org/W4312852845","https://openalex.org/W4386076628","https://openalex.org/W6600384961"],"related_works":["https://openalex.org/W2377059580","https://openalex.org/W4200355488","https://openalex.org/W127000293","https://openalex.org/W3215892509","https://openalex.org/W2928616779","https://openalex.org/W2412592434","https://openalex.org/W4244602709","https://openalex.org/W594987446","https://openalex.org/W2012131063","https://openalex.org/W2010523086"],"abstract_inverted_index":{"Localizing":[0],"objects":[1,66,92,116,147,199],"in":[2,20,67,94,118,129,200,203],"3D":[3,42,70,82,135,161,252,272],"scenes":[4],"according":[5],"to":[6,48,88,175,193,221,241,250],"the":[7,21,38,119,134,191,201,217],"semantics":[8],"of":[9,23,40,64,105,133,144,186,198,237],"a":[10,15,49,77,95,101,130,154,169,204,210],"given":[11],"natural":[12],"language":[13],"is":[14],"fundamental":[16],"yet":[17],"important":[18],"task":[19],"field":[22],"multimedia":[24],"understanding,":[25],"which":[26,56],"benefits":[27],"various":[28],"real-world":[29],"applications":[30],"such":[31,139],"as":[32],"robotics":[33],"and":[34,59,126,141,245,263,276],"autonomous":[35],"driving.":[36],"However,":[37],"majority":[39],"existing":[41],"object":[43,110,219],"grounding":[44,178,229,274],"methods":[45,275],"are":[46,122],"restricted":[47],"single-sentence":[50],"input":[51],"describing":[52],"an":[53],"individual":[54],"object,":[55],"cannot":[57],"comprehend":[58],"reason":[60],"more":[61,68,96,149],"contextualized":[62],"descriptions":[63],"multiple":[65,91],"practical":[69],"cases.":[71],"To":[72,137],"this":[73],"end,":[74],"we":[75,112,152,166,208,233],"introduce":[76],"new":[78],"challenging":[79,259],"task,":[80],"called":[81],"Dense":[83],"Object":[84],"Grounding":[85],"(3D":[86],"DOG),":[87],"jointly":[89],"localize":[90],"described":[93,117],"complicated":[97],"paragraph":[98,121,202],"rather":[99],"than":[100],"single":[102],"sentence.":[103],"Instead":[104],"naively":[106],"localizing":[107],"each":[108,181],"sentence-guided":[109],"independently,":[111],"found":[113],"that":[114,215,266],"dense":[115],"same":[120],"often":[123],"semantically":[124],"related":[125],"spatially":[127],"located":[128],"focused":[131],"region":[132],"scene.":[136],"explore":[138],"semantic":[140,196],"spatial":[142,248],"relationships":[143,197],"densely":[145],"referred":[146],"for":[148,160,180,225],"accurate":[150],"localization,":[151],"propose":[153],"novel":[155],"Stacked":[156],"Transformer":[157],"based":[158],"framework":[159],"DOG,":[162],"named":[163],"3DOGSFormer.":[164],"Specifically,":[165],"first":[167],"devise":[168],"contextual":[170,188],"query-driven":[171],"local":[172,218],"transformer":[173,213],"decoder":[174,214],"generate":[176],"initial":[177,228],"proposals":[179],"target":[182],"object.":[183],"The":[184],"design":[185],"these":[187],"queries":[189],"enables":[190],"model":[192],"capture":[194],"linguistic":[195],"lightweight":[205],"manner.":[206],"Then,":[207],"employ":[209],"proposal-guided":[211,238],"global":[212],"exploits":[216],"features":[220],"learn":[222],"their":[223,277],"correlation":[224],"further":[226],"refining":[227],"proposals.":[230],"In":[231],"particular,":[232],"develop":[234],"two":[235],"types":[236],"attention":[239],"layers":[240],"encode":[242],"both":[243],"explicit":[244],"implicit":[246],"pairwise":[247],"relations":[249],"enhance":[251],"relation":[253],"understanding.":[254],"Extensive":[255],"experiments":[256],"on":[257],"three":[258],"benchmarks":[260],"(Nr3D,":[261],"Sr3D,":[262],"ScanRefer)":[264],"show":[265],"our":[267],"proposed":[268],"3DOGSFormer":[269],"outperforms":[270],"state-of-the-art":[271],"single-object":[273],"dense-object":[278],"variants":[279],"by":[280],"significant":[281],"margins.":[282]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
