{"id":"https://openalex.org/W4405033500","doi":"https://doi.org/10.1109/iccv51701.2025.00210","title":"Visual Modality Prompt for Adapting Vision-Language Object Detectors","display_name":"Visual Modality Prompt for Adapting Vision-Language Object Detectors","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4405033500","doi":"https://doi.org/10.1109/iccv51701.2025.00210"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.00210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.00622","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044936138","display_name":"Heitor R. Medeiros","orcid":"https://orcid.org/0000-0002-1300-3337"},"institutions":[{"id":"https://openalex.org/I4210162773","display_name":"Living Systems (United States)","ror":"https://ror.org/05sft7551","country_code":"US","type":"company","lineage":["https://openalex.org/I4210162773"]},{"id":"https://openalex.org/I9736820","display_name":"\u00c9cole de Technologie Sup\u00e9rieure","ror":"https://ror.org/0020snb74","country_code":"CA","type":"education","lineage":["https://openalex.org/I49663120","https://openalex.org/I9736820"]}],"countries":["CA","US"],"is_corresponding":true,"raw_author_name":"Heitor R. Medeiros","raw_affiliation_strings":["LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering","institution_ids":["https://openalex.org/I9736820","https://openalex.org/I4210162773"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026179240","display_name":"Atif Belal","orcid":null},"institutions":[{"id":"https://openalex.org/I4210162773","display_name":"Living Systems (United States)","ror":"https://ror.org/05sft7551","country_code":"US","type":"company","lineage":["https://openalex.org/I4210162773"]},{"id":"https://openalex.org/I9736820","display_name":"\u00c9cole de Technologie Sup\u00e9rieure","ror":"https://ror.org/0020snb74","country_code":"CA","type":"education","lineage":["https://openalex.org/I49663120","https://openalex.org/I9736820"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Atif Belal","raw_affiliation_strings":["LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering","institution_ids":["https://openalex.org/I9736820","https://openalex.org/I4210162773"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Srikanth Muralidharan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210162773","display_name":"Living Systems (United States)","ror":"https://ror.org/05sft7551","country_code":"US","type":"company","lineage":["https://openalex.org/I4210162773"]},{"id":"https://openalex.org/I9736820","display_name":"\u00c9cole de Technologie Sup\u00e9rieure","ror":"https://ror.org/0020snb74","country_code":"CA","type":"education","lineage":["https://openalex.org/I49663120","https://openalex.org/I9736820"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Srikanth Muralidharan","raw_affiliation_strings":["LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering","institution_ids":["https://openalex.org/I9736820","https://openalex.org/I4210162773"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006937759","display_name":"\u00c9ric Granger","orcid":"https://orcid.org/0000-0001-6116-7945"},"institutions":[{"id":"https://openalex.org/I4210162773","display_name":"Living Systems (United States)","ror":"https://ror.org/05sft7551","country_code":"US","type":"company","lineage":["https://openalex.org/I4210162773"]},{"id":"https://openalex.org/I9736820","display_name":"\u00c9cole de Technologie Sup\u00e9rieure","ror":"https://ror.org/0020snb74","country_code":"CA","type":"education","lineage":["https://openalex.org/I49663120","https://openalex.org/I9736820"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Eric Granger","raw_affiliation_strings":["LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering","institution_ids":["https://openalex.org/I9736820","https://openalex.org/I4210162773"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039660800","display_name":"Marco Pedersoli","orcid":"https://orcid.org/0000-0002-7601-8640"},"institutions":[{"id":"https://openalex.org/I4210162773","display_name":"Living Systems (United States)","ror":"https://ror.org/05sft7551","country_code":"US","type":"company","lineage":["https://openalex.org/I4210162773"]},{"id":"https://openalex.org/I9736820","display_name":"\u00c9cole de Technologie Sup\u00e9rieure","ror":"https://ror.org/0020snb74","country_code":"CA","type":"education","lineage":["https://openalex.org/I49663120","https://openalex.org/I9736820"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Marco Pedersoli","raw_affiliation_strings":["LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIVIA, ETS Montreal, Canada International Laboratory on Learning Systems (ILLS),Dept. of Systems Engineering","institution_ids":["https://openalex.org/I9736820","https://openalex.org/I4210162773"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5044936138"],"corresponding_institution_ids":["https://openalex.org/I4210162773","https://openalex.org/I9736820"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00097437,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2172","last_page":"2182"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.942300021648407,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.8204841613769531},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.520520806312561},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5082082748413086},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4753696322441101},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4047990143299103},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3428955078125}],"concepts":[{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.8204841613769531},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.520520806312561},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5082082748413086},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4753696322441101},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4047990143299103},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3428955078125}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.00210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2412.00622","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.00622","pdf_url":"https://arxiv.org/pdf/2412.00622","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.00622","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.00622","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.00622","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.00622","pdf_url":"https://arxiv.org/pdf/2412.00622","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322675","display_name":"Mitacs","ror":"https://ror.org/00cjrc276"},{"id":"https://openalex.org/F4320331257","display_name":"Alliance de recherche num\u00e9rique du Canada","ror":"https://ror.org/010r6td27"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"The":[0,80],"zero-shot":[1,57,75,124,188],"performance":[2,179],"of":[3,77,140],"object":[4],"detectors":[5,27,118],"degrades":[6],"when":[7],"tested":[8],"on":[9,160,169],"different":[10],"modalities,":[11,30],"such":[12,48],"as":[13,49],"infrared":[14,171],"and":[15,39,51,165,168,174],"depth.":[16],"While":[17],"recent":[18],"work":[19],"has":[20],"explored":[21],"image":[22],"translation":[23,96],"techniques":[24],"to":[25,28,35,42,97,115,119,181],"adapt":[26,116],"new":[29,120],"these":[31,106],"methods":[32],"are":[33],"limited":[34],"a":[36,111,147],"single":[37],"modality":[38,142,158],"apply":[40,91],"only":[41],"traditional":[43],"detectors.":[44,79],"Recently,":[45],"vision-language":[46,89,117,162],"detectors,":[47,163],"YOLO-World":[50,164],"Grounding":[52,166],"DINO,":[53,167],"have":[54,61],"shown":[55],"promising":[56],"capabilities,":[58],"however,":[59],"they":[60],"not":[62],"yet":[63],"been":[64],"adapted":[65],"for":[66,86,157],"other":[67],"visual":[68,81,112,130],"modalities.":[69],"Traditional":[70],"fine-tuning":[71,183],"approaches":[72],"compromise":[73],"the":[74,78,92,138,186],"capabilities":[76],"prompt":[82,95,113,131,143],"strategies":[83],"commonly":[84],"used":[85],"classification":[87],"with":[88],"models":[90],"same":[93],"linear":[94],"each":[98],"image,":[99],"making":[100],"them":[101],"less":[102],"effective.":[103],"To":[104],"address":[105],"limitations,":[107],"we":[108],"propose":[109],"ModPrompt,":[110],"strategy":[114,132],"modalities":[121],"without":[122],"degrading":[123],"performance.":[125],"In":[126],"particular,":[127],"an":[128],"encoder-decoder":[129],"is":[133],"proposed,":[134],"further":[135],"enhanced":[136],"by":[137],"integration":[139],"inference-friendly":[141],"decoupled":[144],"residual,":[145],"facilitating":[146],"more":[148],"robust":[149],"adaptation.":[150],"Empirical":[151],"benchmarking":[152],"results":[153],"show":[154],"our":[155],"method":[156],"adaptation":[159],"two":[161],"challenging":[170],"(LLVIP,":[172],"FLIR)":[173],"depth":[175],"(NYUv2)":[176],"datasets,":[177],"achieving":[178],"comparable":[180],"full":[182],"while":[184],"preserving":[185],"model's":[187],"capability.":[189],"Code":[190],"available":[191],"at:":[192],"https://github.com/heitorrapela/ModPrompt.":[193]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
