{"id":"https://openalex.org/W7128505576","doi":"https://doi.org/10.48550/arxiv.2602.08126","title":"MambaFusion: Adaptive State-Space Fusion for Multimodal 3D Object Detection","display_name":"MambaFusion: Adaptive State-Space Fusion for Multimodal 3D Object Detection","publication_year":2026,"publication_date":"2026-02-08","ids":{"openalex":"https://openalex.org/W7128505576","doi":"https://doi.org/10.48550/arxiv.2602.08126"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.08126","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055659576","display_name":"Venkatraman Narayanan","orcid":"https://orcid.org/0000-0003-0536-4420"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Narayanan, Venkatraman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120095766","display_name":"Bala Charitha Sai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sai, Bala","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125525546","display_name":"Rahul Ahuja","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahuja, Rahul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086687510","display_name":"Pratik Likhar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Likhar, Pratik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100432990","display_name":"Varun Kumar","orcid":"https://orcid.org/0009-0000-3961-8439"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Varun Ravi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5014764449","display_name":"Senthil Yogamani","orcid":"https://orcid.org/0000-0003-3755-4245"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yogamani, Senthil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5483999848365784,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5483999848365784,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.10830000042915344,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.054499998688697815,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.5511999726295471},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5109000205993652},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.4959000051021576},{"id":"https://openalex.org/keywords/lidar","display_name":"Lidar","score":0.49149999022483826},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4131999909877777},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.3921999931335449},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.36079999804496765},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.35589998960494995}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7487999796867371},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6854000091552734},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6761000156402588},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.5511999726295471},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5109000205993652},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.4959000051021576},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.49149999022483826},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4131999909877777},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3921999931335449},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.36079999804496765},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.35589998960494995},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.35040000081062317},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.32420000433921814},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.30640000104904175},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.2694000005722046},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.251800000667572},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.08126","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.08126","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08126","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.08126","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reliable":[0],"3D":[1,34,77,168],"object":[2],"detection":[3,68],"is":[4],"fundamental":[5],"to":[6,88],"autonomous":[7,172],"driving,":[8],"and":[9,15,58,74,107,119,136,166],"multimodal":[10],"fusion":[11,41,109,161],"algorithms":[12],"using":[13],"cameras":[14],"LiDAR":[16,30],"remain":[17],"a":[18,32,65,123],"persistent":[19],"challenge.":[20],"Cameras":[21],"provide":[22],"dense":[23],"visual":[24],"cues":[25],"but":[26,36,47],"ill":[27],"posed":[28],"depth;":[29],"provides":[31],"precise":[33],"structure":[35],"sparse":[37],"coverage.":[38],"Existing":[39],"BEV-based":[40],"frameworks":[42],"have":[43,49],"made":[44],"good":[45],"progress,":[46],"they":[48],"difficulties":[50],"including":[51],"inefficient":[52],"context":[53,92],"modeling,":[54],"spatially":[55],"invariant":[56],"fusion,":[57],"reasoning":[59,129],"under":[60],"uncertainty.":[61],"We":[62],"introduce":[63],"MambaFusion,":[64],"unified":[66],"multi-modal":[67,102],"framework":[69,153],"that":[70,155],"achieves":[71],"efficient,":[72],"adaptive,":[73],"physically":[75],"grounded":[76],"perception.":[78],"MambaFusion":[79,139],"interleaves":[80],"selective":[81],"state-space":[82],"models":[83],"(SSMs)":[84],"with":[85,130,149,159],"windowed":[86],"transformers":[87],"propagate":[89],"the":[90],"global":[91],"in":[93],"linear":[94],"time":[95],"while":[96,147],"preserving":[97],"local":[98],"geometric":[99],"fidelity.":[100],"A":[101],"token":[103],"alignment":[104],"(MTA)":[105],"module":[106],"reliability-aware":[108],"gates":[110],"dynamically":[111],"re-weight":[112],"camera-LiDAR":[113],"features":[114],"based":[115],"on":[116,144],"spatial":[117],"confidence":[118],"calibration":[120],"consistency.":[121],"Finally,":[122],"structure-conditioned":[124],"diffusion":[125],"head":[126],"integrates":[127],"graph-based":[128],"uncertainty-aware":[131],"denoising,":[132],"enforcing":[133],"physical":[134],"plausibility,":[135],"calibrated":[137],"confidence.":[138],"establishes":[140],"new":[141],"state-of-the-art":[142],"performance":[143],"nuScenes":[145],"benchmarks":[146],"operating":[148],"linear-time":[150],"complexity.":[151],"The":[152],"demonstrates":[154],"coupling":[156],"SSM-based":[157],"efficiency":[158],"reliability-driven":[160],"yields":[162],"robust,":[163],"temporally":[164],"stable,":[165],"interpretable":[167],"perception":[169],"for":[170],"real-world":[171],"driving":[173],"systems.":[174]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-02-11T00:00:00"}
