{"id":"https://openalex.org/W7114927851","doi":"https://doi.org/10.1109/lsp.2025.3643354","title":"Explicit-Implicit Prompt Injection and Semantic-Guided Latent LoRA for Vision-Language Tracking","display_name":"Explicit-Implicit Prompt Injection and Semantic-Guided Latent LoRA for Vision-Language Tracking","publication_year":2025,"publication_date":"2025-12-12","ids":{"openalex":"https://openalex.org/W7114927851","doi":"https://doi.org/10.1109/lsp.2025.3643354"},"language":null,"primary_location":{"id":"doi:10.1109/lsp.2025.3643354","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2025.3643354","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jiapeng Zhang","orcid":"https://orcid.org/0009-0007-3728-7577"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiapeng Zhang","raw_affiliation_strings":["College of Information Science and Engineering, Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0009-0007-3728-7577","affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ying Wei","orcid":"https://orcid.org/0000-0003-0915-5378"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Wei","raw_affiliation_strings":["College of Information Science and Engineering, Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0000-0003-0915-5378","affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yongfeng Li","orcid":"https://orcid.org/0009-0006-8829-9992"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongfeng Li","raw_affiliation_strings":["College of Information Science and Engineering, Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0009-0006-8829-9992","affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Gang Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Yang","raw_affiliation_strings":["College of Information Science and Engineering, Northeastern University, Shenyang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"last","author":{"id":null,"display_name":"Qiaohong Hao","orcid":"https://orcid.org/0000-0003-0072-5936"},"institutions":[{"id":"https://openalex.org/I4210114043","display_name":"Henan Institute of Technology","ror":"https://ror.org/024f5m737","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210114043"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiaohong Hao","raw_affiliation_strings":["College of Intelligent Engineering, Henan Institute of Technology, Henan, China"],"raw_orcid":"https://orcid.org/0000-0003-0072-5936","affiliations":[{"raw_affiliation_string":"College of Intelligent Engineering, Henan Institute of Technology, Henan, China","institution_ids":["https://openalex.org/I4210114043"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.55428526,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"33","issue":null,"first_page":"376","last_page":"380"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9570000171661377,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9570000171661377,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.004600000102072954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.0044999998062849045,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5350000262260437},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.46149998903274536},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.43470001220703125},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.4334000051021576},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.4189000129699707},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3758000135421753},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.3513999879360199},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3425000011920929},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.3328999876976013}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7850000262260437},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5350000262260437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5317999720573425},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.46149998903274536},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4413999915122986},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.43470001220703125},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.4334000051021576},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.4189000129699707},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3758000135421753},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3513999879360199},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.3328999876976013},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.3240000009536743},{"id":"https://openalex.org/C2777379011","wikidata":"https://www.wikidata.org/wiki/Q938545","display_name":"Implicit learning","level":3,"score":0.3240000009536743},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.3197999894618988},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.31369999051094055},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.31189998984336853},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2937000095844269},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.28949999809265137},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C170133592","wikidata":"https://www.wikidata.org/wiki/Q1806883","display_name":"Latent semantic analysis","level":2,"score":0.28029999136924744},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2766999900341034},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C112933361","wikidata":"https://www.wikidata.org/wiki/Q2845258","display_name":"Probabilistic latent semantic analysis","level":2,"score":0.2590000033378601},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2025.3643354","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2025.3643354","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4585823974","display_name":null,"funder_award_id":"62441231","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2891033863","https://openalex.org/W2963109634","https://openalex.org/W3181069167","https://openalex.org/W4206662040","https://openalex.org/W4292828074","https://openalex.org/W4312751983","https://openalex.org/W4313307856","https://openalex.org/W4321766229","https://openalex.org/W4386075643","https://openalex.org/W4387757618","https://openalex.org/W4387969322","https://openalex.org/W4390871485","https://openalex.org/W4391759558","https://openalex.org/W4393159404","https://openalex.org/W4396505919","https://openalex.org/W4399852316","https://openalex.org/W4402753915","https://openalex.org/W4402754150","https://openalex.org/W4402951596","https://openalex.org/W4402981768","https://openalex.org/W4405844469","https://openalex.org/W4408930345","https://openalex.org/W4409098757","https://openalex.org/W4409257272"],"related_works":[],"abstract_inverted_index":{"Prompt-based":[0],"learning":[1],"has":[2],"shown":[3],"promise":[4],"in":[5,31,114],"visual-language":[6],"tracking":[7],"(VLT),":[8],"yet":[9],"existing":[10],"methods":[11],"often":[12],"rely":[13],"on":[14,36,122],"either":[15],"explicit":[16,78],"or":[17],"implicit":[18,93],"prompting":[19],"alone,":[20],"limiting":[21],"fine-grained":[22],"cross-modal":[23,139],"alignment.":[24],"Moreover,":[25],"Low-Rank":[26],"Adaptation":[27],"(LoRA)":[28],"-based":[29],"fine-tuning":[30],"prior":[32,147],"work":[33],"typically":[34],"focuses":[35],"visual-only":[37],"adaptation,":[38],"overlooking":[39],"language":[40],"semantics.":[41],"To":[42],"address":[43],"these":[44],"issues,":[45],"we":[46],"propose":[47],"a":[48,101,134],"unified":[49],"VLT":[50],"framework":[51],"that":[52],"integrates":[53],"Explicit-Implicit":[54],"Prompt":[55],"Injection":[56],"(EIPI)":[57],"and":[58,70],"Semantic-Guided":[59],"Latent":[60],"LoRA":[61,109],"(SGLL).":[62],"EIPI":[63],"introduces":[64],"semantic":[65],"prompts":[66,79,94],"to":[67],"facilitate":[68],"robust":[69],"context-sensitive":[71],"target":[72,86],"modeling":[73],"through":[74],"two":[75],"pathways.":[76],"The":[77],"are":[80,95],"constructed":[81],"by":[82,110],"interact":[83],"between":[84],"multi-modal":[85],"representations":[87],"with":[88,137],"the":[89,115,127],"search":[90],"region,":[91],"while":[92,150],"learned":[96],"from":[97],"linguistic":[98],"features":[99],"via":[100],"lightweight":[102],"bottleneck":[103],"network.":[104],"Then,":[105],"SGLL":[106],"extends":[107],"standard":[108],"introducing":[111],"learnable":[112],"queries":[113],"latent":[116],"space,":[117],"allowing":[118],"residual":[119],"modulation":[120],"based":[121],"language-visual":[123],"semantics":[124],"without":[125],"retraining":[126],"full":[128],"model.":[129],"This":[130],"dual":[131],"design":[132],"yields":[133],"parameter-efficient":[135],"tracker":[136],"strong":[138],"adaptability.":[140],"Extensive":[141],"experiments":[142],"show":[143],"our":[144],"method":[145],"outperforms":[146],"prompt-based":[148],"approaches":[149],"maintaining":[151],"high":[152],"efficiency.":[153]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-12T00:00:00"}
