{"id":"https://openalex.org/W4414162015","doi":"https://doi.org/10.1109/tpami.2025.3609348","title":"MV2DFusion: Leveraging Modality-Specific Object Semantics for Multi-Modal 3D Detection","display_name":"MV2DFusion: Leveraging Modality-Specific Object Semantics for Multi-Modal 3D Detection","publication_year":2025,"publication_date":"2025-09-12","ids":{"openalex":"https://openalex.org/W4414162015","doi":"https://doi.org/10.1109/tpami.2025.3609348","pmid":"https://pubmed.ncbi.nlm.nih.gov/40938719"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3609348","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3609348","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101845790","display_name":"Zitian Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zitian Wang","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017829201","display_name":"Zehao Huang","orcid":"https://orcid.org/0000-0003-1653-208X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zehao Huang","raw_affiliation_strings":["Independent Researcher, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Independent Researcher, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056297901","display_name":"Yulu Gao","orcid":"https://orcid.org/0000-0002-3895-1288"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yulu Gao","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100742122","display_name":"Naiyan Wang","orcid":"https://orcid.org/0000-0002-0526-3331"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naiyan Wang","raw_affiliation_strings":["Independent Researcher, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Independent Researcher, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100330138","display_name":"Si Liu","orcid":"https://orcid.org/0000-0002-9180-2935"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Si Liu","raw_affiliation_strings":["Institute of Artificial Intelligence, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101845790"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":5.1136,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.95792428,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"48","issue":"1","first_page":"609","last_page":"623"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.975600004196167,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.7253999710083008},{"id":"https://openalex.org/keywords/lidar","display_name":"Lidar","score":0.5486000180244446},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.5264999866485596},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.515999972820282},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5097000002861023},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4535999894142151},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.4514999985694885},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.43230000138282776},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.42660000920295715}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8084999918937683},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.7253999710083008},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.671999990940094},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6227999925613403},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.5486000180244446},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.5264999866485596},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.515999972820282},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5097000002861023},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4535999894142151},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.4514999985694885},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.43230000138282776},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.42660000920295715},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.41760000586509705},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.397599995136261},{"id":"https://openalex.org/C182521987","wikidata":"https://www.wikidata.org/wiki/Q2493877","display_name":"Viola\u2013Jones object detection framework","level":5,"score":0.36320000886917114},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.36239999532699585},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.35989999771118164},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.34540000557899475},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.326200008392334},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.30489999055862427},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3021000027656555},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.2881999909877777},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.273499995470047},{"id":"https://openalex.org/C71681937","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object-class detection","level":5,"score":0.27300000190734863},{"id":"https://openalex.org/C53073257","wikidata":"https://www.wikidata.org/wiki/Q7075021","display_name":"Object-oriented design","level":3,"score":0.2694000005722046},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2678000032901764},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.26429998874664307},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.25940001010894775},{"id":"https://openalex.org/C126422989","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature detection (computer vision)","level":4,"score":0.2565999925136566}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3609348","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3609348","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:40938719","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40938719","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2222512263","https://openalex.org/W2897529137","https://openalex.org/W2949708697","https://openalex.org/W2954174912","https://openalex.org/W2963150697","https://openalex.org/W2963351448","https://openalex.org/W2963727135","https://openalex.org/W2964062501","https://openalex.org/W2964241181","https://openalex.org/W2968296999","https://openalex.org/W2982770724","https://openalex.org/W3034314779","https://openalex.org/W3035346742","https://openalex.org/W3035461736","https://openalex.org/W3035574168","https://openalex.org/W3096609285","https://openalex.org/W3109395584","https://openalex.org/W3127743092","https://openalex.org/W3136022415","https://openalex.org/W3138516171","https://openalex.org/W3167095230","https://openalex.org/W3170030651","https://openalex.org/W3175563878","https://openalex.org/W3199093552","https://openalex.org/W3209639308","https://openalex.org/W3215100485","https://openalex.org/W4225793049","https://openalex.org/W4225986494","https://openalex.org/W4285606661","https://openalex.org/W4312312588","https://openalex.org/W4312443924","https://openalex.org/W4312707458","https://openalex.org/W4312894406","https://openalex.org/W4312953085","https://openalex.org/W4382464460","https://openalex.org/W4382466543","https://openalex.org/W4383066393","https://openalex.org/W4385245566","https://openalex.org/W4386076253","https://openalex.org/W4386076493","https://openalex.org/W4389665536","https://openalex.org/W4390872833","https://openalex.org/W4390874137","https://openalex.org/W4390874155","https://openalex.org/W4390874213","https://openalex.org/W4390874598","https://openalex.org/W4393149498","https://openalex.org/W4394994942","https://openalex.org/W4401415883","https://openalex.org/W4402716222","https://openalex.org/W4404520687"],"related_works":["https://openalex.org/W2385859805","https://openalex.org/W2530972254","https://openalex.org/W2374013449","https://openalex.org/W73545470","https://openalex.org/W2364381299","https://openalex.org/W2374430585","https://openalex.org/W3144423903","https://openalex.org/W2377397762","https://openalex.org/W2793967660","https://openalex.org/W627697492"],"abstract_inverted_index":{"The":[0],"rise":[1],"of":[2,57],"autonomous":[3],"vehicles":[4],"has":[5],"significantly":[6],"increased":[7],"the":[8,55,96,105,143],"demand":[9],"for":[10,137],"robust":[11],"3D":[12,33],"object":[13,87,107,113],"detection":[14,51,114,158],"systems.":[15],"While":[16],"cameras":[17],"and":[18,29,77,111,128,135,145],"LiDAR":[19,30],"sensors":[20],"each":[21],"offer":[22],"unique":[23],"advantages-cameras":[24],"provide":[25],"rich":[26],"texture":[27],"information":[28],"offers":[31],"precise":[32],"spatial":[34],"data-relying":[35],"on":[36,104,142],"a":[37,49,78],"single":[38,93],"modality":[39],"often":[40],"leads":[41],"to":[42,72,123],"performance":[43],"limitations.":[44],"This":[45],"paper":[46],"introduces":[47],"MV2DFusion,":[48],"multi-modal":[50],"framework":[52],"that":[53,149],"integrates":[54],"strengths":[56],"both":[58],"worlds":[59],"through":[60],"an":[61,68],"advanced":[62],"query-based":[63],"fusion":[64,98],"mechanism.":[65],"By":[66],"introducing":[67],"image":[69,127],"query":[70,81],"generator":[71],"align":[73],"with":[74,125],"image-specific":[75],"attributes":[76],"point":[79,129],"cloud":[80],"generator,":[82],"MV2DFusion":[83,150],"effectively":[84],"combines":[85],"modality-specific":[86],"semantics":[88],"without":[89],"biasing":[90],"toward":[91],"one":[92],"modality.":[94],"Then":[95],"sparse":[97],"process":[99],"can":[100],"be":[101],"accomplished":[102],"based":[103],"valuable":[106],"semantics,":[108],"ensuring":[109],"efficient":[110],"accurate":[112],"across":[115],"various":[116],"scenarios.":[117,159],"Our":[118],"framework's":[119],"flexibility":[120],"allows":[121],"it":[122],"integrate":[124],"any":[126],"cloud-based":[130],"detectors,":[131],"showcasing":[132],"its":[133],"adaptability":[134],"potential":[136],"future":[138],"advancements.":[139],"Extensive":[140],"evaluations":[141],"nuScenes":[144],"Argoverse2":[146],"datasets":[147],"demonstrate":[148],"achieves":[151],"state-of-the-art":[152],"performance,":[153],"particularly":[154],"excelling":[155],"in":[156],"long-range":[157]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
