{"id":"https://openalex.org/W7118183269","doi":"https://doi.org/10.1109/tpami.2025.3650478","title":"Hierarchical Context Alignment With Disentangled Geometric and Temporal Modeling for Semantic Occupancy Prediction","display_name":"Hierarchical Context Alignment With Disentangled Geometric and Temporal Modeling for Semantic Occupancy Prediction","publication_year":2026,"publication_date":"2026-01-05","ids":{"openalex":"https://openalex.org/W7118183269","doi":"https://doi.org/10.1109/tpami.2025.3650478","pmid":"https://pubmed.ncbi.nlm.nih.gov/41489959"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3650478","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3650478","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035550418","display_name":"B. Li","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bohan Li","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-6959-7517","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xin Jin","orcid":"https://orcid.org/0000-0002-1820-8358"},"institutions":[{"id":"https://openalex.org/I4394709157","display_name":"Eastern Institute of Technology, Ningbo","ror":"https://ror.org/036mbz113","country_code":null,"type":"education","lineage":["https://openalex.org/I4394709157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Jin","raw_affiliation_strings":["Ningbo Institute of Digital Twin, Eastern Institute of Technology, Ningbo, China"],"raw_orcid":"https://orcid.org/0000-0002-1820-8358","affiliations":[{"raw_affiliation_string":"Ningbo Institute of Digital Twin, Eastern Institute of Technology, Ningbo, China","institution_ids":["https://openalex.org/I4394709157"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiajun Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jiajun Deng","raw_affiliation_strings":["University of Adelaide (UoA), Adelaide, SA, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Adelaide (UoA), Adelaide, SA, Australia","institution_ids":["https://openalex.org/I5681781"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yasheng Sun","orcid":"https://orcid.org/0000-0002-0589-4424"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yasheng Sun","raw_affiliation_strings":["Tokyo Institute of Technology, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0002-0589-4424","affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiaofeng Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofeng Wang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049963367","display_name":"Wenjun Zeng","orcid":"https://orcid.org/0000-0003-2531-3137"},"institutions":[{"id":"https://openalex.org/I4210165339","display_name":"Ningbo Institute of Industrial Technology","ror":"https://ror.org/05nqg3g04","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165339"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjun Zeng","raw_affiliation_strings":["Ningbo Key Laboratory of Spatial Intelligence and Digital Derivative, Ningbo Institute of Digital Twin, Eastern Institute of Technology, Ningbo, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0003-2531-3137","affiliations":[{"raw_affiliation_string":"Ningbo Key Laboratory of Spatial Intelligence and Digital Derivative, Ningbo Institute of Digital Twin, Eastern Institute of Technology, Ningbo, Zhejiang, China","institution_ids":["https://openalex.org/I4210165339"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5035550418"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02988219,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"48","issue":"5","first_page":"5388","last_page":"5404"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.26649999618530273,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.26649999618530273,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.20350000262260437,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.17649999260902405,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5881999731063843},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5708000063896179},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5497000217437744},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5250999927520752},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.5105999708175659},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.49720001220703125},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4912000000476837},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.47269999980926514},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.45249998569488525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.79830002784729},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7416999936103821},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5881999731063843},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5708000063896179},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5497000217437744},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5250999927520752},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.5105999708175659},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.49720001220703125},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4912000000476837},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.47269999980926514},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.45249998569488525},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.414900004863739},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.4106999933719635},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4011000096797943},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.39910000562667847},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3905999958515167},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3578999936580658},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.35690000653266907},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.3361000120639801},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.325300008058548},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.29660001397132874},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.2797999978065491},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2754000127315521},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2655999958515167},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.26499998569488525},{"id":"https://openalex.org/C104065381","wikidata":"https://www.wikidata.org/wiki/Q1002535","display_name":"Geometric modeling","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C160331591","wikidata":"https://www.wikidata.org/wiki/Q7075743","display_name":"Occupancy","level":2,"score":0.2535000145435333}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3650478","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3650478","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:41489959","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41489959","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5543680787086487,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G3527258163","display_name":null,"funder_award_id":"62302246","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Camera-based":[0],"3D":[1,11],"Semantic":[2],"Occupancy":[3],"Prediction":[4],"(SOP)":[5],"is":[6],"crucial":[7],"for":[8,89,103,147,172],"understanding":[9],"complex":[10],"scenes":[12],"from":[13],"limited":[14],"2D":[15],"image":[16],"observations.":[17],"Existing":[18],"SOP":[19,93],"methods":[20],"typically":[21],"aggregate":[22],"contextual":[23,68],"features":[24,47],"to":[25,66,112],"assist":[26],"the":[27,45,49,61,98,114,121,158,177,187],"occupancy":[28],"representation":[29,74],"learning,":[30],"alleviating":[31],"issues":[32,43],"like":[33],"occlusion":[34],"or":[35],"ambiguity.":[36],"However,":[37],"these":[38],"solutions":[39],"often":[40],"face":[41],"misalignment":[42],"wherein":[44],"corresponding":[46],"at":[48],"same":[50],"position":[51],"across":[52],"different":[53,57],"frames":[54],"may":[55],"have":[56],"semantic":[58,173,184],"meanings":[59],"during":[60],"aggregation":[62],"process,":[63],"which":[64,106],"leads":[65],"unreliable":[67],"fusion":[69],"results":[70],"and":[71,100,133,142,155,161,182],"an":[72],"unstable":[73],"learning":[75],"process.":[76],"To":[77],"address":[78],"this":[79],"problem,":[80],"we":[81],"introduce":[82],"a":[83,90,125],"new":[84],"Hierarchical":[85],"context":[86,102],"alignment":[87,127,154],"paradigm":[88],"more":[91],"accurate":[92],"(Hi-SOP).":[94],"Hi-SOP":[95],"first":[96],"disentangles":[97],"geometric":[99,132,160],"temporal":[101,134,162],"separate":[104,135],"alignment,":[105,136],"two":[107],"branches":[108],"are":[109],"then":[110],"composed":[111],"enhance":[113],"reliability":[115],"of":[116,120,157],"SOP.":[117],"This":[118],"parsing":[119],"visual":[122],"input":[123],"into":[124],"local-global":[126],"hierarchy":[128],"includes:":[129],"(I)":[130],"disentangled":[131],"within":[137],"each":[138],"leverages":[139],"depth":[140],"confidence":[141],"camera":[143],"pose":[144],"as":[145],"prior":[146],"relevant":[148],"feature":[149],"matching":[150],"respectively;":[151],"(II)":[152],"global":[153],"composition":[156],"transformed":[159],"volumes":[163],"based":[164],"on":[165,176,186],"semantics":[166],"consistency.":[167],"Our":[168],"method":[169],"outperforms":[170],"SOTAs":[171],"scene":[174],"completion":[175],"SemanticKITTI":[178],"&":[179],"NuScenes-Occupancy":[180],"datasets":[181],"LiDAR":[183],"segmentation":[185],"NuScenes":[188],"dataset.":[189]},"counts_by_year":[],"updated_date":"2026-04-04T06:10:10.580331","created_date":"2026-01-05T00:00:00"}
