{"id":"https://openalex.org/W4416748869","doi":"https://doi.org/10.1109/iros60139.2025.11247697","title":"A Coarse-to-Fine Approach to Multi-Modality 3D Occupancy Grounding","display_name":"A Coarse-to-Fine Approach to Multi-Modality 3D Occupancy Grounding","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416748869","doi":"https://doi.org/10.1109/iros60139.2025.11247697"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11247697","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247697","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042482076","display_name":"Zhan Shi","orcid":"https://orcid.org/0000-0002-4812-8770"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]},{"id":"https://openalex.org/I97750245","display_name":"Software (Spain)","ror":"https://ror.org/02ethns06","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210087817","https://openalex.org/I97750245"]}],"countries":["CN","ES"],"is_corresponding":true,"raw_author_name":"Zhan Shi","raw_affiliation_strings":["College of Software Technology, Zhejiang University"],"affiliations":[{"raw_affiliation_string":"College of Software Technology, Zhejiang University","institution_ids":["https://openalex.org/I76130692","https://openalex.org/I97750245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037347051","display_name":"Song Wang","orcid":"https://orcid.org/0000-0002-8758-7988"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Song Wang","raw_affiliation_strings":["College of Computer Science, Zhejiang University,Hangzhou,China,310027"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University,Hangzhou,China,310027","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102725442","display_name":"Junbo Chen","orcid":"https://orcid.org/0000-0002-3696-2266"},"institutions":[{"id":"https://openalex.org/I4210126152","display_name":"Ospedale Santa Maria della Misericordia di Udine","ror":"https://ror.org/02zpc2253","country_code":"IT","type":"healthcare","lineage":["https://openalex.org/I4210126152"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Junbo Chen","raw_affiliation_strings":["Udeer.ai,Hangzhou,China,310000"],"affiliations":[{"raw_affiliation_string":"Udeer.ai,Hangzhou,China,310000","institution_ids":["https://openalex.org/I4210126152"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108119533","display_name":"Jianke Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianke Zhu","raw_affiliation_strings":["College of Computer Science, Zhejiang University,Hangzhou,China,310027"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Zhejiang University,Hangzhou,China,310027","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5042482076"],"corresponding_institution_ids":["https://openalex.org/I76130692","https://openalex.org/I97750245"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36921218,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"858","last_page":"865"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8252999782562256,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8252999782562256,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.08399999886751175,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.010300000198185444,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/occupancy","display_name":"Occupancy","score":0.7926999926567078},{"id":"https://openalex.org/keywords/minimum-bounding-box","display_name":"Minimum bounding box","score":0.609499990940094},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.5579000115394592},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.5519999861717224},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.5324000120162964},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5133000016212463},{"id":"https://openalex.org/keywords/occupancy-grid-mapping","display_name":"Occupancy grid mapping","score":0.483599990606308},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4300000071525574},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.36410000920295715}],"concepts":[{"id":"https://openalex.org/C160331591","wikidata":"https://www.wikidata.org/wiki/Q7075743","display_name":"Occupancy","level":2,"score":0.7926999926567078},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6690000295639038},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6118999719619751},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.609499990940094},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.5579000115394592},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.5519999861717224},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.5324000120162964},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5133000016212463},{"id":"https://openalex.org/C57077369","wikidata":"https://www.wikidata.org/wiki/Q7075747","display_name":"Occupancy grid mapping","level":4,"score":0.483599990606308},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.47130000591278076},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4300000071525574},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.36410000920295715},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.357699990272522},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3540000021457672},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.313400000333786},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.31130000948905945},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2678000032901764},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11247697","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247697","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1667249920","https://openalex.org/W2558535589","https://openalex.org/W2601564443","https://openalex.org/W2795587607","https://openalex.org/W2904910963","https://openalex.org/W2911486422","https://openalex.org/W2946086442","https://openalex.org/W2962764817","https://openalex.org/W2970603850","https://openalex.org/W2984121207","https://openalex.org/W2986755220","https://openalex.org/W2986803748","https://openalex.org/W2987734933","https://openalex.org/W3035574168","https://openalex.org/W3037533539","https://openalex.org/W3095974555","https://openalex.org/W3096609285","https://openalex.org/W3107521863","https://openalex.org/W3163747765","https://openalex.org/W3203949114","https://openalex.org/W3206171352","https://openalex.org/W3215584334","https://openalex.org/W4214490042","https://openalex.org/W4214684415","https://openalex.org/W4312385518","https://openalex.org/W4312523916","https://openalex.org/W4312565984","https://openalex.org/W4312749817","https://openalex.org/W4312894406","https://openalex.org/W4386076400","https://openalex.org/W4390872638","https://openalex.org/W4390874087","https://openalex.org/W4393149498","https://openalex.org/W4393156227","https://openalex.org/W4400648747","https://openalex.org/W4402713134","https://openalex.org/W4402716047","https://openalex.org/W4403003250","https://openalex.org/W4403488668","https://openalex.org/W4409263336","https://openalex.org/W4410739005","https://openalex.org/W4413144952","https://openalex.org/W4413947031"],"related_works":[],"abstract_inverted_index":{"Visual":[0],"grounding":[1,29,67,94,107,147,155],"aims":[2],"at":[3,189],"identifying":[4],"objects":[5],"or":[6],"regions":[7],"in":[8,23,53,68],"a":[9,47,62,133,146,153,158],"scene":[10],"based":[11],"on":[12,33,73,171,181],"natural":[13,79],"language":[14,80],"descriptions,":[15],"which":[16],"is":[17,187],"essential":[18],"for":[19,64,104,136,142,149],"spatially":[20],"aware":[21],"perception":[22,89],"autonomous":[24],"driving.":[25],"However,":[26],"existing":[27,179],"visual":[28],"tasks":[30],"typically":[31],"depend":[32],"bounding":[34,48],"boxes":[35],"that":[36,175],"often":[37],"fail":[38],"to":[39,91,119,128],"capture":[40],"fine-grained":[41],"details.":[42],"Not":[43],"all":[44],"voxels":[45],"within":[46],"box":[49],"are":[50],"occupied,":[51],"resulting":[52],"inaccurate":[54],"object":[55,88,121],"representations.":[56],"To":[57],"address":[58],"this,":[59],"we":[60,97],"introduce":[61],"benchmark":[63,173],"3D":[65,105,182],"occupancy":[66,83,106,124,140,183],"challenging":[69],"outdoor":[70],"scenes.":[71],"Built":[72],"the":[74,92,172],"nuScenes":[75],"dataset,":[76],"it":[77],"fuses":[78],"with":[81],"voxel-level":[82],"annotations,":[84],"offering":[85],"more":[86],"precise":[87],"compared":[90],"traditional":[93],"task.":[95],"Moreover,":[96],"propose":[98],"GroundingOcc,":[99],"an":[100,139],"end-to-end":[101],"model":[102,167],"designed":[103],"through":[108],"multimodal":[109,134],"learning.":[110],"It":[111],"combines":[112],"visual,":[113],"textual,":[114],"and":[115,123,145,157],"point":[116],"cloud":[117],"features":[118],"predict":[120],"location":[122],"information":[125],"from":[126],"coarse":[127],"fine.":[129],"Specifically,":[130],"GroundingOcc":[131],"comprises":[132],"encoder":[135],"feature":[137],"extraction,":[138],"head":[141,148],"voxel-wise":[143],"predictions,":[144],"refining":[150],"localization.":[151],"Additionally,":[152],"2D":[154],"module":[156,161],"depth":[159],"estimation":[160],"enhance":[162],"geometric":[163],"understanding,":[164],"thereby":[165],"boosting":[166],"performance.":[168],"Extensive":[169],"experiments":[170],"demonstrate":[174],"our":[176],"method":[177],"outperforms":[178],"baselines":[180],"grounding.":[184],"The":[185],"dataset":[186],"available":[188],"https://github.com/RONINGOD/GroundingOcc.":[190]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
