{"id":"https://openalex.org/W7134809560","doi":"https://doi.org/10.1109/lra.2026.3671538","title":"StreamCMT: Prior-Guided Multimodal Temporal Fusion for Sparse 3D Object Detection","display_name":"StreamCMT: Prior-Guided Multimodal Temporal Fusion for Sparse 3D Object Detection","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134809560","doi":"https://doi.org/10.1109/lra.2026.3671538"},"language":null,"primary_location":{"id":"doi:10.1109/lra.2026.3671538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3671538","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121132557","display_name":"Yanliang Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I114234892","display_name":"Beijing Union University","ror":"https://ror.org/01hg31662","country_code":"CN","type":"education","lineage":["https://openalex.org/I114234892"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanliang Huang","raw_affiliation_strings":["College of Robotics, Beijing Union University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-6145-1114","affiliations":[{"raw_affiliation_string":"College of Robotics, Beijing Union University, Beijing, China","institution_ids":["https://openalex.org/I114234892"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055268135","display_name":"Yuansheng LIU","orcid":null},"institutions":[{"id":"https://openalex.org/I114234892","display_name":"Beijing Union University","ror":"https://ror.org/01hg31662","country_code":"CN","type":"education","lineage":["https://openalex.org/I114234892"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuansheng Liu","raw_affiliation_strings":["Department of Electronic Engineering, Beijing Union University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-8343-6875","affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Beijing Union University, Beijing, China","institution_ids":["https://openalex.org/I114234892"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5121132557"],"corresponding_institution_ids":["https://openalex.org/I114234892"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.47526632,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"5","first_page":"5358","last_page":"5365"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6635000109672546,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6635000109672546,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.1451999992132187,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.12319999933242798,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.6168000102043152},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5952000021934509},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5562000274658203},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.44279998540878296},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4375999867916107},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.40119999647140503},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3977999985218048},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.38960000872612},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.3677999973297119}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7728000283241272},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6901000142097473},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.6168000102043152},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5952000021934509},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5562000274658203},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5379999876022339},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.44279998540878296},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4375999867916107},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.40119999647140503},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3977999985218048},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.38960000872612},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.3677999973297119},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3479999899864197},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.32919999957084656},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.30959999561309814},{"id":"https://openalex.org/C152745839","wikidata":"https://www.wikidata.org/wiki/Q5438153","display_name":"Fault detection and isolation","level":3,"score":0.30410000681877136},{"id":"https://openalex.org/C35525427","wikidata":"https://www.wikidata.org/wiki/Q745881","display_name":"Intrusion detection system","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.30079999566078186},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2849000096321106},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.2623000144958496},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.2581999897956848}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2026.3671538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3671538","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7617606552","display_name":null,"funder_award_id":"62371013","funder_id":"https://openalex.org/F4320327720","funder_display_name":"Foundation for Innovative Research Groups of the National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320327720","display_name":"Foundation for Innovative Research Groups of the National Natural Science Foundation of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"3D":[1,81],"detection":[2,33,120,129,182],"is":[3,23],"critical":[4],"for":[5,25,187],"autonomous":[6,188],"driving":[7,189],"reliability.":[8],"While":[9],"most":[10],"existing":[11,29],"methods":[12,34],"boost":[13],"accuracy":[14,92,130,183],"via":[15],"elaborate":[16],"networks,":[17],"they":[18,40],"neglect":[19],"inference":[20,163,185],"speed":[21,46,164],"which":[22],"essential":[24],"real-world":[26],"deployment.":[27],"Although":[28],"decoder-based":[30],"sparse":[31],"query":[32],"offer":[35],"advantages":[36],"in":[37,44],"real-time":[38],"performance,":[39],"suffer":[41],"from":[42,165],"limitations":[43],"convergence":[45,57,95],"and":[47,58,93,134,154,158,184],"cross-modal":[48],"feature":[49,60],"integration.":[50],"To":[51],"address":[52],"these":[53],"challenges":[54],"of":[55,131],"slow":[56],"inadequate":[59],"fusion,":[61],"this":[62],"paper":[63],"proposes":[64],"a":[65,108,128,173],"Prior-Guided":[66],"Position":[67],"Embedding":[68],"Module":[69,111],"based":[70],"on":[71,137],"the":[72,138,143,148],"Cross":[73],"Modal":[74],"Transformer":[75],"(CMT)":[76],"framework.":[77],"The":[78,170],"module":[79],"reconstructs":[80],"sampling":[82],"point":[83],"distribution":[84],"through":[85],"spatial":[86],"geometric":[87],"priors,":[88],"effectively":[89],"improving":[90],"model":[91,171],"accelerating":[94],"without":[96],"incurring":[97],"additional":[98],"computational":[99],"overhead.":[100],"Concurrently,":[101],"to":[102,117,147,167],"enhance":[103],"motion":[104],"awareness,":[105],"we":[106],"integrate":[107],"Temporal":[109],"Fusion":[110],"that":[112,125],"leverages":[113],"historical":[114],"frame":[115],"information":[116],"optimize":[118],"current":[119],"performance.":[121],"Experimental":[122],"results":[123],"demonstrate":[124],"StreamCMT":[126],"achieves":[127],"72.5%":[132],"NDS":[133,153],"69.6%":[135],"mAP":[136,155],"nuScenes":[139],"test":[140],"set.":[141],"On":[142],"validation":[144],"set,":[145],"compared":[146],"baseline":[149],"model,":[150],"it":[151],"improves":[152],"by":[156],"1.0%":[157],"1.1%":[159],"respectively,":[160],"while":[161,176],"increasing":[162],"12.0":[166],"14.4":[168],"FPS.":[169],"maintains":[172],"lightweight":[174],"architecture":[175],"achieving":[177],"an":[178],"effective":[179],"trade-off":[180],"between":[181],"efficiency":[186],"perception":[190],"systems.":[191]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-11T00:00:00"}
