{"id":"https://openalex.org/W4399115154","doi":"https://doi.org/10.48550/arxiv.2405.17429","title":"GaussianFormer: Scene as Gaussians for Vision-Based 3D Semantic Occupancy Prediction","display_name":"GaussianFormer: Scene as Gaussians for Vision-Based 3D Semantic Occupancy Prediction","publication_year":2024,"publication_date":"2024-05-27","ids":{"openalex":"https://openalex.org/W4399115154","doi":"https://doi.org/10.48550/arxiv.2405.17429"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.17429","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.17429","pdf_url":"https://arxiv.org/pdf/2405.17429","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.17429","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102942331","display_name":"Yuanhui Huang","orcid":"https://orcid.org/0000-0002-4774-7449"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Yuanhui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006396086","display_name":"Wenzhao Zheng","orcid":"https://orcid.org/0000-0001-7188-3734"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Wenzhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100457275","display_name":"Yunpeng Zhang","orcid":"https://orcid.org/0000-0002-6697-1024"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yunpeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003189850","display_name":"Jie Zhou","orcid":"https://orcid.org/0000-0003-2406-3981"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100460385","display_name":"Jiwen Lu","orcid":"https://orcid.org/0000-0002-6121-5529"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Jiwen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102942331"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/occupancy","display_name":"Occupancy","score":0.7671337127685547},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5911747813224792},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5590587258338928},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.46358346939086914},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08466640114784241}],"concepts":[{"id":"https://openalex.org/C160331591","wikidata":"https://www.wikidata.org/wiki/Q7075743","display_name":"Occupancy","level":2,"score":0.7671337127685547},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5911747813224792},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5590587258338928},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.46358346939086914},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08466640114784241},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.17429","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.17429","pdf_url":"https://arxiv.org/pdf/2405.17429","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2405.17429","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.17429","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.17429","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.17429","pdf_url":"https://arxiv.org/pdf/2405.17429","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"3D":[0,7,70,74,105,122],"semantic":[1,75,88],"occupancy":[2,45,123],"prediction":[3],"aims":[4],"to":[5,55,68,120],"obtain":[6],"fine-grained":[8],"geometry":[9],"and":[10,16,46,52,86,99,110,144],"semantics":[11],"of":[12,24,44,49,58,84,104,163],"the":[13,22,42,47,96,102,128,140],"surrounding":[14],"scene":[15,38],"is":[17,168],"an":[18,65,115],"important":[19],"task":[20],"for":[21,131],"robustness":[23],"vision-centric":[25],"autonomous":[26],"driving.":[27],"Most":[28],"existing":[29],"methods":[30,157],"employ":[31],"dense":[32],"grids":[33],"such":[34],"as":[35,37],"voxels":[36],"representations,":[39],"which":[40,125],"ignore":[41],"sparsity":[43],"diversity":[48],"object":[50],"scales":[51],"thus":[53],"lead":[54],"unbalanced":[56],"allocation":[57],"resources.":[59],"To":[60],"address":[61],"this,":[62],"we":[63],"propose":[64,114],"object-centric":[66],"representation":[67],"describe":[69],"scenes":[71],"with":[72,155,158],"sparse":[73],"Gaussians":[76,106,130],"where":[77],"each":[78],"Gaussian":[79],"represents":[80],"a":[81,132],"flexible":[82],"region":[83],"interest":[85],"its":[87],"features.":[89],"We":[90,112,135],"aggregate":[91],"information":[92],"from":[93],"images":[94],"through":[95],"attention":[97],"mechanism":[98],"iteratively":[100],"refine":[101],"properties":[103],"including":[107],"position,":[108],"covariance,":[109],"semantics.":[111],"then":[113],"efficient":[116],"Gaussian-to-voxel":[117],"splatting":[118],"method":[119],"generate":[121],"predictions,":[124],"only":[126,159],"aggregates":[127],"neighboring":[129],"certain":[133],"position.":[134],"conduct":[136],"extensive":[137],"experiments":[138],"on":[139],"widely":[141],"adopted":[142],"nuScenes":[143],"KITTI-360":[145],"datasets.":[146],"Experimental":[147],"results":[148],"demonstrate":[149],"that":[150],"GaussianFormer":[151],"achieves":[152],"comparable":[153],"performance":[154],"state-of-the-art":[156],"17.8%":[160],"-":[161],"24.8%":[162],"their":[164],"memory":[165],"consumption.":[166],"Code":[167],"available":[169],"at:":[170],"https://github.com/huang-yh/GaussianFormer.":[171]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2024-05-29T00:00:00"}
