{"id":"https://openalex.org/W4405354453","doi":"https://doi.org/10.48550/arxiv.2412.08774","title":"ProtoOcc: Accurate, Efficient 3D Occupancy Prediction Using Dual Branch Encoder-Prototype Query Decoder","display_name":"ProtoOcc: Accurate, Efficient 3D Occupancy Prediction Using Dual Branch Encoder-Prototype Query Decoder","publication_year":2024,"publication_date":"2024-12-11","ids":{"openalex":"https://openalex.org/W4405354453","doi":"https://doi.org/10.48550/arxiv.2412.08774"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2412.08774","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.08774","pdf_url":"https://arxiv.org/pdf/2412.08774","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.08774","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100426073","display_name":"Jung Ho Kim","orcid":"https://orcid.org/0000-0003-4931-3553"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kim, Jungho","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111114838","display_name":"Chengyang Kang","orcid":"https://orcid.org/0009-0009-4057-4483"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kang, Changwon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028079288","display_name":"Dongyoung Lee","orcid":"https://orcid.org/0000-0002-6716-1023"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Dongyoung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107247482","display_name":"S. Choi","orcid":"https://orcid.org/0000-0001-6225-9876"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Sehwan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102839991","display_name":"Jun Won Choi","orcid":"https://orcid.org/0000-0002-3733-0148"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Jun Won","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100426073"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9574999809265137,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9574999809265137,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9276000261306763,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9218999743461609,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/occupancy","display_name":"Occupancy","score":0.7513015270233154},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6987206935882568},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6851186752319336},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.6444604396820068},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.3686493933200836},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.13317731022834778},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07496443390846252}],"concepts":[{"id":"https://openalex.org/C160331591","wikidata":"https://www.wikidata.org/wiki/Q7075743","display_name":"Occupancy","level":2,"score":0.7513015270233154},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6987206935882568},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6851186752319336},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.6444604396820068},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3686493933200836},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.13317731022834778},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07496443390846252},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2412.08774","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.08774","pdf_url":"https://arxiv.org/pdf/2412.08774","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.08774","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.08774","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.08774","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.08774","pdf_url":"https://arxiv.org/pdf/2412.08774","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4405354453.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4282043467","https://openalex.org/W2105697914","https://openalex.org/W2202433167","https://openalex.org/W3093197249","https://openalex.org/W1540010871","https://openalex.org/W3023979140","https://openalex.org/W3177545769"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,146,164],"introduce":[4],"ProtoOcc,":[5],"a":[6,25,51,67,81,91,153],"novel":[7],"3D":[8,22,53,58,115,150],"occupancy":[9,16,151],"prediction":[10],"model":[11,180],"designed":[12],"to":[13,104,130,181],"predict":[14,149],"the":[15,37,43,86,96,106,114,136,157,166,179,184,195],"states":[17],"and":[18,42,60,76,177],"semantic":[19,27],"classes":[20],"of":[21,29,33,118,209],"voxels":[23],"through":[24,66],"deep":[26],"understanding":[28],"scenes.":[30],"ProtoOcc":[31,187],"consists":[32],"two":[34],"main":[35],"components:":[36],"Dual":[38],"Branch":[39],"Encoder":[40],"(DBE)":[41],"Prototype":[44,102,168],"Query":[45],"Decoder":[46],"(PQD).":[47],"The":[48,99],"DBE":[49],"produces":[50],"new":[52],"voxel":[54,59,97,116],"representation":[55,88],"by":[56,79,126],"combining":[57],"BEV":[61,87],"representations":[62],"across":[63],"multiple":[64],"scales":[65],"dual":[68],"branch":[69],"structure.":[70],"This":[71],"design":[72],"enhances":[73],"both":[74],"performance":[75,190],"computational":[77],"efficiency":[78],"providing":[80],"large":[82],"receptive":[83,93],"field":[84,94],"for":[85,95,144,159],"while":[89,121],"maintaining":[90],"smaller":[92],"representation.":[98],"PQD":[100],"introduces":[101],"Queries":[103],"accelerate":[105],"decoding":[107],"process.":[108],"Scene-Adaptive":[109,128],"Prototypes":[110,123,129],"are":[111,124],"derived":[112],"from":[113],"features":[117],"input":[119],"sample,":[120],"Scene-Agnostic":[122],"computed":[125],"applying":[127],"an":[131,206,213],"Exponential":[132],"Moving":[133],"Average":[134],"during":[135,183],"training":[137,185],"phase.":[138,186],"By":[139],"using":[140],"these":[141],"prototype-based":[142],"queries":[143],"decoding,":[145],"can":[147,219],"directly":[148],"in":[152],"single":[154],"step,":[155],"eliminating":[156],"need":[158],"iterative":[160],"Transformer":[161],"decoding.":[162],"Additionally,":[163],"propose":[165],"Robust":[167],"Learning,":[169],"which":[170],"injects":[171],"noise":[172],"into":[173],"prototype":[174],"generation":[175],"process":[176],"trains":[178],"denoise":[182],"achieves":[188],"state-of-the-art":[189],"with":[191,205],"45.02%":[192],"mIoU":[193,204],"on":[194,212],"Occ3D-nuScenes":[196],"benchmark.":[197],"For":[198],"single-frame":[199],"method,":[200],"it":[201],"reaches":[202],"39.56%":[203],"inference":[207],"speed":[208],"12.83":[210],"FPS":[211],"NVIDIA":[214],"RTX":[215],"3090.":[216],"Our":[217],"code":[218],"be":[220],"found":[221],"at":[222],"https://github.com/SPA-junghokim/ProtoOcc.":[223]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
