{"id":"https://openalex.org/W4402780123","doi":"https://doi.org/10.1109/cvpr52733.2024.01568","title":"OVMR: Open-Vocabulary Recognition with Multi-Modal References","display_name":"OVMR: Open-Vocabulary Recognition with Multi-Modal References","publication_year":2024,"publication_date":"2024-06-16","ids":{"openalex":"https://openalex.org/W4402780123","doi":"https://doi.org/10.1109/cvpr52733.2024.01568"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52733.2024.01568","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52733.2024.01568","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023181336","display_name":"Zehong Ma","orcid":"https://orcid.org/0009-0005-1533-2651"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Zehong Ma","raw_affiliation_strings":["School of Computer Science, Peking University,National Key Laboratory for Multimedia Information Processing"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Peking University,National Key Laboratory for Multimedia Information Processing","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101777591","display_name":"Shiliang Zhang","orcid":"https://orcid.org/0000-0002-9524-1602"},"institutions":[{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Shiliang Zhang","raw_affiliation_strings":["School of Computer Science, Peking University,National Key Laboratory for Multimedia Information Processing"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Peking University,National Key Laboratory for Multimedia Information Processing","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050660610","display_name":"Longhui Wei","orcid":"https://orcid.org/0000-0001-6916-3009"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Longhui Wei","raw_affiliation_strings":["Huawei Inc"],"affiliations":[{"raw_affiliation_string":"Huawei Inc","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111504451","display_name":"Qi Tian","orcid":"https://orcid.org/0009-0003-2676-5300"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qi Tian","raw_affiliation_strings":["Huawei Inc"],"affiliations":[{"raw_affiliation_string":"Huawei Inc","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5023181336"],"corresponding_institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":2.1822,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.89461265,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"16571","last_page":"16581"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9230999946594238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7148256301879883},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6617516875267029},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6544376611709595},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5066297650337219},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4327229857444763},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4029366672039032},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.36571812629699707},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06254991888999939}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7148256301879883},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6617516875267029},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6544376611709595},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5066297650337219},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4327229857444763},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4029366672039032},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.36571812629699707},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06254991888999939},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52733.2024.01568","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52733.2024.01568","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.75,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2349784553","https://openalex.org/W3022596247","https://openalex.org/W2601444686","https://openalex.org/W4307058054","https://openalex.org/W4292238148","https://openalex.org/W4323660495","https://openalex.org/W2385319785","https://openalex.org/W2900827440","https://openalex.org/W3167549738","https://openalex.org/W2381983017"],"abstract_inverted_index":{"The":[0,141],"challenge":[1],"of":[2,12,79,134,166],"open-vocabulary":[3,67],"recognition":[4,68],"lies":[5],"in":[6],"the":[7,30,50,129,157,163],"model":[8],"has":[9],"no":[10],"clue":[11],"new":[13],"categories":[14],"it":[15,169],"is":[16,46,104,119,144],"applied":[17,121],"to.":[18],"Existing":[19],"works":[20,149],"have":[21,161],"proposed":[22,142],"different":[23,71],"methods":[24,172],"to":[25,42,60,75,93,122,131],"embed":[26],"category":[27,37,98],"cues":[28,99],"into":[29],"model,":[31],"e.g.,":[32,168],"through":[33],"few-shot":[34],"fine-tuning,":[35],"providing":[36],"names":[38],"or":[39,138],"textual":[40,80,110,139],"descriptions":[41,54,81,111],"Vision-Language":[43],"Models.":[44],"Fine-tuning":[45],"time-consuming":[47],"and":[48,58,82,125,148,176],"degrades":[49],"generalization":[51],"capability.":[52],"Textual":[53],"could":[55],"be":[56],"ambiguous":[57],"fail":[59],"depict":[61],"visual":[62],"details.":[63],"This":[64],"paper":[65],"tackles":[66],"from":[69,156],"a":[70,95,145],"perspective":[72],"by":[73,107],"referring":[74],"multi-modal":[76,102,126],"clues":[77],"composed":[78],"exemplar":[83,136,152],"images.":[84],"Our":[85],"method,":[86],"named":[87],"OVMR,":[88,167],"adopts":[89],"two":[90],"innovative":[91],"components":[92],"pursue":[94],"more":[96],"robust":[97],"embedding.":[100],"A":[101,115],"classifier":[103],"first":[105],"generated":[106],"dynamically":[108],"complementing":[109],"with":[112,128,151],"image":[113],"exemplars.":[114],"preference-based":[116],"refinement":[117],"module":[118],"hence":[120],"fuse":[123],"uni-modal":[124],"classifiers,":[127],"aim":[130],"alleviate":[132],"issues":[133],"low-quality":[135],"images":[137,153],"descriptions.":[140],"OVMR":[143],"plug-and-play":[146],"module,":[147],"well":[150],"randomly":[154],"crawled":[155],"Internet.":[158],"Extensive":[159],"experiments":[160],"demonstrated":[162],"promising":[164],"performance":[165],"outperforms":[170],"existing":[171],"across":[173],"various":[174],"scenarios":[175],"setups.":[177],"Codes":[178],"are":[179],"publicly":[180],"available":[181],"at":[182],"https://github.com/Zehong-Ma/OVMR.":[183]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
