{"id":"https://openalex.org/W4406860151","doi":"https://doi.org/10.1109/vcip63160.2024.10849818","title":"IoU-CLIP: IoU-Aware Language-Image Model Tuning for Open Vocabulary Object Detection","display_name":"IoU-CLIP: IoU-Aware Language-Image Model Tuning for Open Vocabulary Object Detection","publication_year":2024,"publication_date":"2024-12-08","ids":{"openalex":"https://openalex.org/W4406860151","doi":"https://doi.org/10.1109/vcip63160.2024.10849818"},"language":"en","primary_location":{"id":"doi:10.1109/vcip63160.2024.10849818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vcip63160.2024.10849818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Visual Communications and Image Processing (VCIP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102878278","display_name":"Mingzhou He","orcid":"https://orcid.org/0009-0007-6555-6973"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingzhou He","raw_affiliation_strings":["University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075737786","display_name":"Qingbo Wu","orcid":"https://orcid.org/0000-0003-2936-6340"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingbo Wu","raw_affiliation_strings":["University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062332580","display_name":"King Ngi Ngan","orcid":"https://orcid.org/0000-0003-1946-3235"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"King Ngi Ngan","raw_affiliation_strings":["University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046871364","display_name":"Yiming Xiao","orcid":"https://orcid.org/0000-0002-0962-3525"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiming Xiao","raw_affiliation_strings":["University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100617043","display_name":"Fanman Meng","orcid":"https://orcid.org/0000-0002-3016-2567"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fanman Meng","raw_affiliation_strings":["University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066434602","display_name":"Heqian Qiu","orcid":"https://orcid.org/0000-0002-0963-0311"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heqian Qiu","raw_affiliation_strings":["University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114378292","display_name":"Hongliang Li","orcid":"https://orcid.org/0000-0002-7481-095X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongliang Li","raw_affiliation_strings":["University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,School of Information and Communication Engineering,Chengdu,China,611731","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102878278"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23481747,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.909500002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8128204941749573},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7051304578781128},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6567251086235046},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5921837091445923},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5745232105255127},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.5537711381912231},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5186923742294312},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4178951680660248},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3020363450050354},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10596391558647156}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8128204941749573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7051304578781128},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6567251086235046},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5921837091445923},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5745232105255127},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.5537711381912231},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5186923742294312},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4178951680660248},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3020363450050354},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10596391558647156},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/vcip63160.2024.10849818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vcip63160.2024.10849818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Visual Communications and Image Processing (VCIP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7099999785423279}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null},{"id":"https://openalex.org/F4320329861","display_name":"Natural Science Foundation of Sichuan Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2948672349","https://openalex.org/W2963150697","https://openalex.org/W2963936013","https://openalex.org/W2997998901","https://openalex.org/W3034199269","https://openalex.org/W3096609285","https://openalex.org/W3173859428","https://openalex.org/W3216939881","https://openalex.org/W4284961860","https://openalex.org/W4310557340","https://openalex.org/W4312563428","https://openalex.org/W4312747482","https://openalex.org/W4312773012","https://openalex.org/W4312890493","https://openalex.org/W4386065936","https://openalex.org/W4386066010","https://openalex.org/W4386071547","https://openalex.org/W4386071855","https://openalex.org/W4386075882","https://openalex.org/W4386076396","https://openalex.org/W4388854793","https://openalex.org/W4390872357","https://openalex.org/W4402716321","https://openalex.org/W4402727672","https://openalex.org/W6620707391","https://openalex.org/W6790019176","https://openalex.org/W6791353385"],"related_works":["https://openalex.org/W2349784553","https://openalex.org/W3022596247","https://openalex.org/W2601444686","https://openalex.org/W4307058054","https://openalex.org/W4292238148","https://openalex.org/W4323660495","https://openalex.org/W2385319785","https://openalex.org/W2900827440","https://openalex.org/W4292830139","https://openalex.org/W4319309705"],"abstract_inverted_index":{"Open":[0],"vocabulary":[1,60],"object":[2,61],"detection":[3],"(OVD),":[4],"which":[5,103],"detects":[6],"novel":[7,46,151],"categories":[8],"through":[9],"detectors":[10],"trained":[11],"on":[12,33,131,150],"base":[13],"categories,":[14,152],"has":[15],"achieved":[16],"remarkable":[17],"advancement":[18],"attributable":[19],"to":[20,79,84,119],"large-scale":[21],"vision-language":[22],"models,":[23],"such":[24],"as":[25,77],"CLIP.":[26],"The":[27,94],"prior":[28],"OVD":[29,136],"works":[30],"mainly":[31],"focused":[32],"improving":[34],"the":[35,41,81,121,124,132,139],"classification":[36,106],"accuracy":[37],"of":[38,43,123],"proposals,":[39,102],"ignoring":[40],"ability":[42],"localization":[44,125],"for":[45,58,101,116],"categories.":[47],"In":[48],"this":[49],"work,":[50],"we":[51,64],"propose":[52],"IoU-aware":[53,86,109],"language-image":[54],"model":[55,83],"tuning":[56],"(IoU-CLIP)":[57],"open":[59],"detection.":[62],"Specifically,":[63],"construct":[65],"a":[66],"region":[67],"image":[68],"dataset":[69],"with":[70,105],"different":[71],"IoU":[72,75,99],"and":[73,87,91,110,134,147],"adopt":[74],"values":[76],"labels":[78],"fine-tune":[80],"CLIP":[82],"learn":[85],"class-agnostic":[88,111],"semantic":[89],"prompts":[90],"visual":[92,112],"embeddings.":[93],"fine-tuned":[95],"IoU-CLIP":[96],"can":[97],"predict":[98],"scores":[100],"interact":[104],"scores.":[107],"Meanwhile,":[108],"embeddings":[113],"are":[114],"utilized":[115],"box":[117],"regression":[118],"enhance":[120],"generalization":[122],"capability.":[126],"We":[127],"evaluate":[128],"our":[129],"method":[130],"COCO":[133],"LVIS":[135],"benchmarks,":[137],"outperforming":[138],"baseline":[140],"(RegionCLIP)":[141],"by":[142],"5.5%":[143],"AP<inf":[144],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[145],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">50</inf>":[146],"5.8%":[148],"AP":[149],"respectively,":[153],"achieving":[154],"state-of-the-art":[155],"performance.":[156]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-10T00:00:00"}
