{"id":"https://openalex.org/W4390120069","doi":"https://doi.org/10.1109/tpami.2023.3345880","title":"MsSVT++: Mixed-Scale Sparse Voxel Transformer With Center Voting for 3D Object Detection","display_name":"MsSVT++: Mixed-Scale Sparse Voxel Transformer With Center Voting for 3D Object Detection","publication_year":2023,"publication_date":"2023-12-22","ids":{"openalex":"https://openalex.org/W4390120069","doi":"https://doi.org/10.1109/tpami.2023.3345880","pmid":"https://pubmed.ncbi.nlm.nih.gov/38133980"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2023.3345880","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2023.3345880","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100448292","display_name":"Jianan Li","orcid":"https://orcid.org/0000-0002-6936-9485"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I1327237609","display_name":"Ministry of Education of the People's Republic of China","ror":"https://ror.org/01mv9t934","country_code":"CN","type":"government","lineage":["https://openalex.org/I1327237609","https://openalex.org/I4210127390"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianan Li","raw_affiliation_strings":["Beijing Institute of Technology, Beijing, China","Key Laboratory of Photoelectronic Imaging Technology and System, Ministry of Education of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Key Laboratory of Photoelectronic Imaging Technology and System, Ministry of Education of China, Beijing, China","institution_ids":["https://openalex.org/I1327237609"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009887040","display_name":"Shaocong Dong","orcid":"https://orcid.org/0000-0001-6152-9299"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I1327237609","display_name":"Ministry of Education of the People's Republic of China","ror":"https://ror.org/01mv9t934","country_code":"CN","type":"government","lineage":["https://openalex.org/I1327237609","https://openalex.org/I4210127390"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaocong Dong","raw_affiliation_strings":["Beijing Institute of Technology, Beijing, China","Key Laboratory of Photoelectronic Imaging Technology and System, Ministry of Education of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Key Laboratory of Photoelectronic Imaging Technology and System, Ministry of Education of China, Beijing, China","institution_ids":["https://openalex.org/I1327237609"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026328907","display_name":"Lihe Ding","orcid":"https://orcid.org/0000-0003-1976-9496"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I1327237609","display_name":"Ministry of Education of the People's Republic of China","ror":"https://ror.org/01mv9t934","country_code":"CN","type":"government","lineage":["https://openalex.org/I1327237609","https://openalex.org/I4210127390"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lihe Ding","raw_affiliation_strings":["Beijing Institute of Technology, Beijing, China","Key Laboratory of Photoelectronic Imaging Technology and System, Ministry of Education of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Key Laboratory of Photoelectronic Imaging Technology and System, Ministry of Education of China, Beijing, China","institution_ids":["https://openalex.org/I1327237609"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019242095","display_name":"Tingfa Xu","orcid":"https://orcid.org/0000-0001-5452-2662"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I1327237609","display_name":"Ministry of Education of the People's Republic of China","ror":"https://ror.org/01mv9t934","country_code":"CN","type":"government","lineage":["https://openalex.org/I1327237609","https://openalex.org/I4210127390"]},{"id":"https://openalex.org/I50632499","display_name":"Chongqing University of Technology","ror":"https://ror.org/04vgbd477","country_code":"CN","type":"education","lineage":["https://openalex.org/I50632499"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tingfa Xu","raw_affiliation_strings":["Beijing Institute of Technology, Beijing, China","Beijing Institute of Technology Chongqing Innovation Center, Chongqing, China","Key Laboratory of Photoelectronic Imaging Technology and System, Ministry of Education of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Beijing Institute of Technology Chongqing Innovation Center, Chongqing, China","institution_ids":["https://openalex.org/I50632499","https://openalex.org/I125839683"]},{"raw_affiliation_string":"Key Laboratory of Photoelectronic Imaging Technology and System, Ministry of Education of China, Beijing, China","institution_ids":["https://openalex.org/I1327237609"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100448292"],"corresponding_institution_ids":["https://openalex.org/I125839683","https://openalex.org/I1327237609"],"apc_list":null,"apc_paid":null,"fwci":2.3838,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.90897939,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"46","issue":"5","first_page":"3736","last_page":"3752"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/voxel","display_name":"Voxel","score":0.7688965797424316},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7605528831481934},{"id":"https://openalex.org/keywords/sliding-window-protocol","display_name":"Sliding window protocol","score":0.6459872722625732},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5767063498497009},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.4883362948894501},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4688386023044586},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4515003263950348},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.44232454895973206},{"id":"https://openalex.org/keywords/divide-and-conquer-algorithms","display_name":"Divide and conquer algorithms","score":0.42645013332366943},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.41205182671546936},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1739909052848816},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.14652463793754578}],"concepts":[{"id":"https://openalex.org/C54170458","wikidata":"https://www.wikidata.org/wiki/Q663554","display_name":"Voxel","level":2,"score":0.7688965797424316},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7605528831481934},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.6459872722625732},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5767063498497009},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.4883362948894501},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4688386023044586},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4515003263950348},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44232454895973206},{"id":"https://openalex.org/C71559656","wikidata":"https://www.wikidata.org/wiki/Q671298","display_name":"Divide and conquer algorithms","level":2,"score":0.42645013332366943},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.41205182671546936},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1739909052848816},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.14652463793754578},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2023.3345880","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2023.3345880","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:38133980","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38133980","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4360217077","display_name":null,"funder_award_id":"62101032","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":77,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1644641054","https://openalex.org/W2115579991","https://openalex.org/W2194775991","https://openalex.org/W2211722331","https://openalex.org/W2555618208","https://openalex.org/W2565639579","https://openalex.org/W2798965597","https://openalex.org/W2897529137","https://openalex.org/W2949708697","https://openalex.org/W2963083779","https://openalex.org/W2963341956","https://openalex.org/W2963351448","https://openalex.org/W2963400571","https://openalex.org/W2963446712","https://openalex.org/W2963727135","https://openalex.org/W2963925437","https://openalex.org/W2968296999","https://openalex.org/W2988715931","https://openalex.org/W2995854419","https://openalex.org/W3004351857","https://openalex.org/W3008105217","https://openalex.org/W3034314779","https://openalex.org/W3034429258","https://openalex.org/W3035172746","https://openalex.org/W3035346742","https://openalex.org/W3111535274","https://openalex.org/W3118341329","https://openalex.org/W3131500599","https://openalex.org/W3134233478","https://openalex.org/W3138516171","https://openalex.org/W3153465022","https://openalex.org/W3167095230","https://openalex.org/W3167732492","https://openalex.org/W3175563878","https://openalex.org/W3192424468","https://openalex.org/W3201205318","https://openalex.org/W3203701986","https://openalex.org/W3205005447","https://openalex.org/W3209005318","https://openalex.org/W4214755140","https://openalex.org/W4214777292","https://openalex.org/W4225871896","https://openalex.org/W4226344270","https://openalex.org/W4310078553","https://openalex.org/W4312294656","https://openalex.org/W4312307873","https://openalex.org/W4312437143","https://openalex.org/W4312501532","https://openalex.org/W4312546175","https://openalex.org/W4312616477","https://openalex.org/W4312617306","https://openalex.org/W4312916565","https://openalex.org/W4312976258","https://openalex.org/W4313014889","https://openalex.org/W4313160444","https://openalex.org/W4313168566","https://openalex.org/W4319299723","https://openalex.org/W4361274151","https://openalex.org/W4380763527","https://openalex.org/W4385245566","https://openalex.org/W4386075636","https://openalex.org/W4390874386","https://openalex.org/W6631190155","https://openalex.org/W6755742144","https://openalex.org/W6763422710","https://openalex.org/W6771483569","https://openalex.org/W6775495027","https://openalex.org/W6784094891","https://openalex.org/W6785213549","https://openalex.org/W6801659397","https://openalex.org/W6801880476","https://openalex.org/W6802311648","https://openalex.org/W6810653034","https://openalex.org/W6811002592","https://openalex.org/W6840443303","https://openalex.org/W6846095257"],"related_works":["https://openalex.org/W2352794675","https://openalex.org/W1539994214","https://openalex.org/W2051228988","https://openalex.org/W4247094814","https://openalex.org/W108531593","https://openalex.org/W2111712077","https://openalex.org/W8810113","https://openalex.org/W2158636562","https://openalex.org/W2162911572","https://openalex.org/W2019464130"],"abstract_inverted_index":{"Accurate":[0],"3D":[1,110],"object":[2,13,188],"detection":[3],"in":[4,12,18,109],"large-scale":[5],"outdoor":[6],"scenes,":[7],"characterized":[8],"by":[9],"considerable":[10],"variations":[11],"scales,":[14],"necessitates":[15],"features":[16],"rich":[17],"both":[19,57],"long-range":[20,33],"and":[21,120,124],"fine-grained":[22,39],"information.":[23],"While":[24],"recent":[25],"detectors":[26],"have":[27],"utilized":[28],"window-based":[29,107],"transformers":[30],"to":[31,37,79,93],"model":[32],"dependencies,":[34],"they":[35],"tend":[36],"overlook":[38],"details.":[40],"To":[41,98,159],"bridge":[42],"this":[43,161],"gap,":[44],"we":[45,113,163],"propose":[46],"MsSVT++,":[47,202],"an":[48,133],"innovative":[49],"Mixed-scale":[50],"Sparse":[51],"Voxel":[52],"Transformer":[53],"that":[54,140,169,193],"simultaneously":[55],"captures":[56],"types":[58],"of":[59,87,149,156,182,201],"information":[60,80,178],"through":[61],"a":[62,82,106,115,129,165],"divide-and-conquer":[63],"approach.":[64],"This":[65],"approach":[66],"involves":[67],"explicitly":[68],"dividing":[69],"attention":[70],"heads":[71],"into":[72],"multiple":[73],"groups,":[74],"each":[75],"responsible":[76],"for":[77],"attending":[78],"within":[81],"specific":[83],"range.":[84],"The":[85],"outputs":[86],"these":[88],"groups":[89],"are":[90,143],"subsequently":[91],"merged":[92],"obtain":[94],"final":[95],"mixed-scale":[96,176],"features.":[97],"mitigate":[99],"the":[100,138,147,153,180,183,199],"computational":[101],"complexity":[102],"associated":[103],"with":[104,175],"applying":[105],"transformer":[108],"voxel":[111,122],"space,":[112],"introduce":[114,164],"novel":[116],"Chessboard":[117],"Sampling":[118],"strategy":[119],"implement":[121],"sampling":[123],"gathering":[125],"operations":[126],"sparsely":[127],"using":[128],"hash":[130],"map.":[131],"Moreover,":[132],"important":[134],"challenge":[135],"stems":[136],"from":[137],"observation":[139],"non-empty":[141],"voxels":[142,173],"primarily":[144],"located":[145],"on":[146],"surface":[148],"objects,":[150,184],"which":[151],"impedes":[152],"accurate":[154],"estimation":[155],"bounding":[157],"boxes.":[158],"overcome":[160],"challenge,":[162],"Center":[166],"Voting":[167],"module":[168],"integrates":[170],"newly":[171],"voted":[172],"enriched":[174],"contextual":[177],"towards":[179],"centers":[181],"thereby":[185],"improving":[186],"precise":[187],"localization.":[189],"Extensive":[190],"experiments":[191],"demonstrate":[192],"our":[194],"single-stage":[195],"detector,":[196],"built":[197],"upon":[198],"foundation":[200],"consistently":[203],"delivers":[204],"exceptional":[205],"performance":[206],"across":[207],"diverse":[208],"datasets.":[209]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":5}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
