{"id":"https://openalex.org/W7164825710","doi":"https://doi.org/10.1145/3805622.3810875","title":"Robust Exemplar Prompt Learning via Bi-directional Visual-Semantic Alignment for Multi-Object Tracking","display_name":"Robust Exemplar Prompt Learning via Bi-directional Visual-Semantic Alignment for Multi-Object Tracking","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164825710","doi":"https://doi.org/10.1145/3805622.3810875"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810875","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810875","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810875","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5138629280","display_name":"Lingyan Liang","orcid":"https://orcid.org/0009-0002-3648-0678"},"institutions":[{"id":"https://openalex.org/I4210113342","display_name":"Systems Engineering Society of China","ror":"https://ror.org/024pse488","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210113342"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingyan Liang","raw_affiliation_strings":["IEIT SYSTEMS Co., Ltd, China, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-3648-0678","affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Co., Ltd, China, Beijing, China","institution_ids":["https://openalex.org/I4210113342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021852883","display_name":"Z. Zhang","orcid":"https://orcid.org/0000-0001-6661-1371"},"institutions":[{"id":"https://openalex.org/I132369690","display_name":"Tianjin University of Science and Technology","ror":"https://ror.org/018rbtf37","country_code":"CN","type":"education","lineage":["https://openalex.org/I132369690"]},{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhibin Zhang","raw_affiliation_strings":["Tianjin University of Technology, Tianjin, China, Tianjin, China"],"raw_orcid":"https://orcid.org/0000-0001-6661-1371","affiliations":[{"raw_affiliation_string":"Tianjin University of Technology, Tianjin, China, Tianjin, China","institution_ids":["https://openalex.org/I136765683","https://openalex.org/I132369690"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102031951","display_name":"Gang Nathan Dong","orcid":"https://orcid.org/0000-0002-3147-1011"},"institutions":[{"id":"https://openalex.org/I4210113342","display_name":"Systems Engineering Society of China","ror":"https://ror.org/024pse488","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210113342"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Dong","raw_affiliation_strings":["IEIT SYSTEMS Co., Ltd, China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3147-1011","affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Co., Ltd, China, Beijing, China","institution_ids":["https://openalex.org/I4210113342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013433372","display_name":"Dongchao Wen","orcid":"https://orcid.org/0000-0001-7311-1842"},"institutions":[{"id":"https://openalex.org/I4210113342","display_name":"Systems Engineering Society of China","ror":"https://ror.org/024pse488","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210113342"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongchao Wen","raw_affiliation_strings":["IEIT SYSTEMS Co., Ltd, China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7311-1842","affiliations":[{"raw_affiliation_string":"IEIT SYSTEMS Co., Ltd, China, Beijing, China","institution_ids":["https://openalex.org/I4210113342"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075412755","display_name":"Kaihua Zhang","orcid":"https://orcid.org/0000-0002-1613-3401"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaihua Zhang","raw_affiliation_strings":["Southeast University, Nanjing, China, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-1613-3401","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93599032,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1553","last_page":"1557"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.8652999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.8652999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.03920000046491623,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.026399999856948853,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7328000068664551},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6973000168800354},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.4253000020980835},{"id":"https://openalex.org/keywords/eye-tracking","display_name":"Eye tracking","score":0.3919000029563904},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.32269999384880066},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.32120001316070557},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.31369999051094055}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.791100025177002},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7328000068664551},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6973000168800354},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5942999720573425},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.4253000020980835},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.3919000029563904},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36719998717308044},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.32120001316070557},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3165999948978424},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.31369999051094055},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2973000109195709},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.296099990606308},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.29179999232292175},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.2849000096321106},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2728999853134155},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2556999921798706},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.25519999861717224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810875","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810875","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810875","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810875","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2840396449","display_name":null,"funder_award_id":"62276141","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2193145675","https://openalex.org/W2214352687","https://openalex.org/W2252355370","https://openalex.org/W2511791013","https://openalex.org/W2603203130","https://openalex.org/W2953920664","https://openalex.org/W3009340385","https://openalex.org/W3086436251","https://openalex.org/W3096609285","https://openalex.org/W3206120323","https://openalex.org/W4281259653","https://openalex.org/W4286904999","https://openalex.org/W4292828369","https://openalex.org/W4310467366","https://openalex.org/W4312473433","https://openalex.org/W4312619242","https://openalex.org/W4313117614","https://openalex.org/W4382240009","https://openalex.org/W4385338973","https://openalex.org/W4390872615","https://openalex.org/W4394596409","https://openalex.org/W4400667732","https://openalex.org/W4402952501","https://openalex.org/W4404001095"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"multi-object":[1],"tracking":[2],"(MOT)":[3],"approaches":[4],"increasingly":[5],"leverage":[6],"pre-trained":[7],"CLIP":[8],"models":[9],"to":[10,27,81,102,138,156],"boost":[11],"cross-domain":[12,178],"generalization.":[13,42],"A":[14],"common":[15],"strategy":[16],"uses":[17],"a":[18,50,97,131,143],"predefined":[19],"TrackBook\u2014a":[20],"closed-set":[21],"of":[22,30,121],"visual":[23,115,127],"concepts\u2014as":[24],"textual":[25,66,84,113,154],"prompts":[26,36,67,85,110],"guide":[28],"learning":[29],"domain-invariant":[31],"representations.":[32],"However,":[33],"these":[34],"fixed":[35],"lack":[37],"adaptive":[38],"context,":[39],"causing":[40],"limited":[41],"To":[43],"address":[44],"this":[45,47],"limitation,":[46],"paper":[48],"introduces":[49],"robust":[51],"Exemplar":[52],"Prompt":[53],"Learning":[54],"(EPL)":[55],"framework":[56],"via":[57],"Bi-directional":[58],"Visual-Semantic":[59],"Alignment":[60],"(BiVSA),":[61],"termed":[62],"EPL-MOT,":[63],"which":[64,151],"augments":[65],"with":[68,86,125],"instance-aware":[69],"contextual":[70,87],"information":[71,155],"derived":[72],"during":[73],"tracking.":[74],"Specifically,":[75],"an":[76],"EPL":[77],"module":[78,99],"is":[79,100,149],"designed":[80],"dynamically":[82],"enrich":[83],"cues,":[88],"enabling":[89],"instance-specific":[90],"adaptation":[91],"without":[92],"inducing":[93],"category":[94],"shift.":[95],"Furthermore,":[96],"BiVSA":[98],"proposed":[101,169],"deepen":[103],"cross-modal":[104],"interaction":[105],"by":[106],"incorporating":[107],"bidirectional":[108],"learnable":[109],"into":[111],"both":[112,175],"and":[114,164,177],"branches.":[116],"This":[117],"facilitates":[118],"progressive":[119],"integration":[120],"global":[122],"semantic":[123],"features":[124],"local":[126],"structures,":[128],"resulting":[129],"in":[130],"more":[132],"effectively":[133],"aligned":[134],"visual-semantic":[135],"space.":[136],"Finally,":[137],"enhance":[139],"robustness":[140],"against":[141],"distractors,":[142],"Category-guided":[144],"Detection":[145],"Query":[146],"Generator":[147],"(CDQG)":[148],"constructed,":[150],"incorporates":[152],"base-class":[153],"suppress":[157],"irrelevant":[158],"targets.":[159],"Comprehensive":[160],"evaluations":[161],"on":[162],"MOT17":[163],"MOT20":[165],"demonstrate":[166],"that":[167],"the":[168],"EPL-MOT":[170],"achieves":[171],"competitive":[172],"performance":[173],"across":[174],"in-domain":[176],"settings.":[179]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
