{"id":"https://openalex.org/W4404391421","doi":"https://doi.org/10.48550/arxiv.2411.06780","title":"SynCL: A Synergistic Training Strategy with Instance-Aware Contrastive Learning for End-to-End Multi-Camera 3D Tracking","display_name":"SynCL: A Synergistic Training Strategy with Instance-Aware Contrastive Learning for End-to-End Multi-Camera 3D Tracking","publication_year":2024,"publication_date":"2024-11-11","ids":{"openalex":"https://openalex.org/W4404391421","doi":"https://doi.org/10.48550/arxiv.2411.06780"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2411.06780","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.06780","pdf_url":"https://arxiv.org/pdf/2411.06780","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2411.06780","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102651649","display_name":"Shubo Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lin, Shubo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069069128","display_name":"Yutong Kou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kou, Yutong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wu, Zirui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Zirui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Shaoru","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shaoru","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109346446","display_name":"Bing Li","orcid":"https://orcid.org/0000-0002-5888-6735"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Bing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079217067","display_name":"Weiming Hu","orcid":"https://orcid.org/0000-0003-4501-1435"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Weiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101436628","display_name":"Jin Gao","orcid":"https://orcid.org/0000-0002-8925-5215"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Jin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102651649"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9570000171661377,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.7195531725883484},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6051848530769348},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.6038122773170471},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5417299270629883},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5147738456726074},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.44720426201820374},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1237947940826416}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.7195531725883484},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6051848530769348},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.6038122773170471},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5417299270629883},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5147738456726074},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.44720426201820374},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1237947940826416},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2411.06780","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.06780","pdf_url":"https://arxiv.org/pdf/2411.06780","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2411.06780","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2411.06780","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2411.06780","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.06780","pdf_url":"https://arxiv.org/pdf/2411.06780","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4404391421.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"While":[0],"existing":[1],"query-based":[2],"3D":[3],"end-to-end":[4],"visual":[5],"trackers":[6],"integrate":[7],"detection":[8,97,177],"and":[9,48,98,178,191,203],"tracking":[10],"via":[11],"the":[12,24,40,57,67,115,130,139,164,174,199],"tracking-by-attention":[13],"paradigm,":[14],"these":[15,31,80],"two":[16,36],"chicken-and-egg":[17],"tasks":[18],"encounter":[19],"optimization":[20],"difficulties":[21,32],"when":[22],"sharing":[23],"same":[25],"parameters.":[26],"Our":[27],"findings":[28],"reveal":[29],"that":[30,113],"arise":[33],"due":[34],"to":[35,72,92,124,161],"inherent":[37],"constraints":[38],"on":[39,79,198],"self-attention":[41,58,131],"mechanism,":[42],"i.e.,":[43],"over-deduplication":[44],"for":[45,51,96,108,138,169],"object":[46,122],"queries":[47,119,123],"self-centric":[49,167],"attention":[50,168],"track":[52,118,170],"queries.":[53],"In":[54],"contrast,":[55],"removing":[56],"mechanism":[59],"not":[60],"only":[61],"minimally":[62],"impacts":[63],"regression":[64],"predictions":[65],"of":[66,117,166],"tracker,":[68],"but":[69],"also":[70,143],"tends":[71],"generate":[73],"more":[74],"latent":[75],"candidate":[76],"boxes.":[77],"Based":[78],"analyses,":[81],"we":[82,101,142,156],"present":[83],"SynCL,":[84],"a":[85,103,109,145],"novel":[86],"plug-and-play":[87],"synergistic":[88],"training":[89,153],"strategy":[90],"designed":[91],"co-facilitate":[93],"multi-task":[94],"learning":[95],"tracking.":[99,179],"Specifically,":[100],"propose":[102],"Task-specific":[104],"Hybrid":[105],"Matching":[106],"module":[107,149],"weight-shared":[110],"cross-attention-based":[111],"decoder":[112],"matches":[114],"targets":[116],"with":[120,195],"multiple":[121],"exploit":[125],"promising":[126],"candidates":[127,137],"overlooked":[128],"by":[129,151],"mechanism.":[132],"To":[133],"flexibly":[134],"select":[135],"optimal":[136],"one-to-many":[140],"matching,":[141],"design":[144],"Dynamic":[146],"Query":[147],"Filtering":[148],"controlled":[150],"model":[152],"status.":[154],"Moreover,":[155],"introduce":[157],"Instance-aware":[158],"Contrastive":[159],"Learning":[160],"break":[162],"through":[163],"barrier":[165],"queries,":[171],"effectively":[172],"bridging":[173],"gap":[175],"between":[176],"Without":[180],"additional":[181],"inference":[182],"costs,":[183],"SynCL":[184],"consistently":[185],"delivers":[186],"improvements":[187],"in":[188],"various":[189],"benchmarks":[190],"achieves":[192],"state-of-the-art":[193],"performance":[194],"$58.9\\%$":[196],"AMOTA":[197],"nuScenes":[200],"dataset.":[201],"Code":[202],"raw":[204],"results":[205],"will":[206],"be":[207],"publicly":[208],"available.":[209]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2024-11-15T00:00:00"}
