{"id":"https://openalex.org/W7134843045","doi":"https://doi.org/10.48550/arxiv.2603.08199","title":"Fusion-Poly: A Polyhedral Framework Based on Spatial-Temporal Fusion for 3D Multi-Object Tracking","display_name":"Fusion-Poly: A Polyhedral Framework Based on Spatial-Temporal Fusion for 3D Multi-Object Tracking","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134843045","doi":"https://doi.org/10.48550/arxiv.2603.08199"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08199","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128640895","display_name":"Xian Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wu, Xian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128633059","display_name":"Yitao Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yitao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128651563","display_name":"Xiaoyu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiaoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001759524","display_name":"Zijia Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zijia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128634305","display_name":"Lijun Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Lijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128638652","display_name":"Lining Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Lining","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5128640895"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.4918000102043152,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.4918000102043152,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.14749999344348907,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.10130000114440918,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/timestamp","display_name":"Timestamp","score":0.8453999757766724},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.7269999980926514},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.7164999842643738},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.46070000529289246},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.4350000023841858},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4108999967575073},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.3799999952316284},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.3799000084400177},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.3781999945640564}],"concepts":[{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.8453999757766724},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7900999784469604},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.7269999980926514},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.7164999842643738},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5539000034332275},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5188000202178955},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.46070000529289246},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.4350000023841858},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.414000004529953},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4108999967575073},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.3799999952316284},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.3799000084400177},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3781999945640564},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.37560001015663147},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.36500000953674316},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.35499998927116394},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3221000134944916},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3034000098705292},{"id":"https://openalex.org/C154586513","wikidata":"https://www.wikidata.org/wiki/Q4420972","display_name":"Tracking system","level":3,"score":0.2955999970436096},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.2840000092983246},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C93226319","wikidata":"https://www.wikidata.org/wiki/Q193137","display_name":"Differential (mechanical device)","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2671999931335449},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.257999986410141}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08199","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.08199","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08199","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08199","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"LiDAR-camera":[0],"3D":[1,106,215],"multi-object":[2],"tracking":[3,18],"(MOT)":[4],"combines":[5],"rich":[6],"visual":[7],"semantics":[8],"with":[9,118,125],"accurate":[10],"depth":[11],"cues":[12],"to":[13,55,80,152,158],"improve":[14],"trajectory":[15,88,164],"consistency":[16,189],"and":[17,24,43,85,112,124,136,154,176,180],"reliability.":[19],"In":[20],"practice,":[21],"however,":[22],"LiDAR":[23,111],"cameras":[25],"operate":[26],"at":[27,46,60,121,128,190],"different":[28],"sampling":[29],"rates.":[30],"To":[31,94],"maintain":[32],"temporal":[33,92],"alignment,":[34],"existing":[35],"data":[36],"pipelines":[37],"usually":[38],"synchronize":[39],"heterogeneous":[40],"sensor":[41],"streams":[42],"annotate":[44],"them":[45],"a":[47,70,101,145,162,181,207],"reduced":[48],"shared":[49],"frequency,":[50],"forcing":[51],"most":[52],"prior":[53],"methods":[54],"perform":[56],"spatial":[57],"fusion":[58,103],"only":[59],"synchronized":[61,122,153,191],"timestamps":[62,123,192],"through":[63,170],"projection-based":[64],"or":[65],"learnable":[66],"cross-sensor":[67],"association.":[68],"As":[69],"result,":[71],"abundant":[72],"asynchronous":[73,110,129,155],"observations":[74,120,127],"remain":[75],"underexploited,":[76],"despite":[77],"their":[78],"potential":[79],"support":[81],"more":[82,86],"frequent":[83],"association":[84],"robust":[87],"estimation":[89,165],"over":[90],"short":[91],"intervals.":[93],"address":[95],"this":[96],"limitation,":[97],"we":[98],"propose":[99],"Fusion-Poly,":[100],"spatial-temporal":[102],"framework":[104,140],"for":[105],"MOT":[107,216],"that":[108,150,167,186],"integrates":[109],"camera":[113],"data.":[114],"Fusion-Poly":[115,202],"associates":[116],"trajectories":[117,169],"multi-modal":[119],"single-modal":[126],"timestamps,":[130],"enabling":[131],"higher-frequency":[132],"updates":[133],"of":[134,210,225],"motion":[135,172],"existence":[137],"states.":[138],"The":[139],"contains":[141],"three":[142],"key":[143],"components:":[144],"frequency-aware":[146,163],"cascade":[147],"matching":[148],"module":[149,166,185],"adapts":[151],"frames":[156],"according":[157],"available":[159],"detection":[160],"modalities;":[161],"maintains":[168],"high-frequency":[171],"prediction,":[173],"differential":[174],"updates,":[175],"confidence-calibrated":[177],"lifecycle":[178],"management;":[179],"full-state":[182],"observation":[183],"alignment":[184],"improves":[187],"cross-modal":[188],"by":[193],"optimizing":[194],"image-projection":[195],"errors.":[196],"On":[197],"the":[198,211,223],"nuScenes":[199],"test":[200],"set,":[201],"achieves":[203],"76.5%":[204],"AMOTA,":[205],"establishing":[206],"new":[208],"state":[209],"art":[212],"among":[213],"tracking-by-detection":[214],"methods.":[217],"Extensive":[218],"ablation":[219],"studies":[220],"further":[221],"validate":[222],"effectiveness":[224],"each":[226],"component.":[227],"Code":[228],"will":[229],"be":[230],"released.":[231]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-11T00:00:00"}
