{"id":"https://openalex.org/W4409129247","doi":"https://doi.org/10.1109/tmm.2025.3557624","title":"HA-FGOVD: Highlighting Fine-Grained Attributes via Explicit Linear Composition for Open-Vocabulary Object Detection","display_name":"HA-FGOVD: Highlighting Fine-Grained Attributes via Explicit Linear Composition for Open-Vocabulary Object Detection","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4409129247","doi":"https://doi.org/10.1109/tmm.2025.3557624"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2025.3557624","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3557624","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yuqi Ma","orcid":"https://orcid.org/0009-0004-0209-292X"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuqi Ma","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088499239","display_name":"Mengyin Liu","orcid":"https://orcid.org/0000-0002-9719-8417"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengyin Liu","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101989915","display_name":"Chao Zhu","orcid":"https://orcid.org/0000-0001-5486-7492"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Zhu","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074514262","display_name":"Xu-Cheng Yin","orcid":"https://orcid.org/0000-0003-0023-0220"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu-Cheng Yin","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":null,"apc_paid":null,"fwci":1.2181,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.7745808,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"27","issue":null,"first_page":"3171","last_page":"3183"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9805999994277954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9805999994277954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9678000211715698,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.92330002784729,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8426648378372192},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.654328465461731},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4819455146789551},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4582575559616089},{"id":"https://openalex.org/keywords/composition","display_name":"Composition (language)","score":0.4487336575984955},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4472615420818329},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3777177333831787},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3357720971107483},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.27557575702667236},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.08094066381454468}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8426648378372192},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.654328465461731},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4819455146789551},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4582575559616089},{"id":"https://openalex.org/C40231798","wikidata":"https://www.wikidata.org/wiki/Q1333743","display_name":"Composition (language)","level":2,"score":0.4487336575984955},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4472615420818329},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3777177333831787},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3357720971107483},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27557575702667236},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.08094066381454468},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3557624","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3557624","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7900000214576721,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2141617675","display_name":null,"funder_award_id":"62076024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6579475244","display_name":null,"funder_award_id":"62472026","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W56385144","https://openalex.org/W639708223","https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W1898560071","https://openalex.org/W2277195237","https://openalex.org/W2886641317","https://openalex.org/W2896457183","https://openalex.org/W2948672349","https://openalex.org/W2963150697","https://openalex.org/W3139434170","https://openalex.org/W3173859428","https://openalex.org/W3176828726","https://openalex.org/W3198377975","https://openalex.org/W3213192039","https://openalex.org/W4310557340","https://openalex.org/W4312424618","https://openalex.org/W4312563428","https://openalex.org/W4312773012","https://openalex.org/W4312956471","https://openalex.org/W4313026212","https://openalex.org/W4313450446","https://openalex.org/W4385245566","https://openalex.org/W4386065936","https://openalex.org/W4386075822","https://openalex.org/W4386076029","https://openalex.org/W4386076396","https://openalex.org/W4387875671","https://openalex.org/W4390707165","https://openalex.org/W4390873448","https://openalex.org/W4391547487","https://openalex.org/W4393147865","https://openalex.org/W4402727200","https://openalex.org/W4402727832","https://openalex.org/W4404612908","https://openalex.org/W6639432524","https://openalex.org/W6748053814","https://openalex.org/W6784333009","https://openalex.org/W6791353385","https://openalex.org/W6802517928","https://openalex.org/W6810171366","https://openalex.org/W6839745749","https://openalex.org/W6842874653","https://openalex.org/W6845354634","https://openalex.org/W6850625674","https://openalex.org/W6854866820","https://openalex.org/W6859925366","https://openalex.org/W6861587938"],"related_works":["https://openalex.org/W2349784553","https://openalex.org/W3022596247","https://openalex.org/W1499958165","https://openalex.org/W2601444686","https://openalex.org/W4307058054","https://openalex.org/W4292238148","https://openalex.org/W4323660495","https://openalex.org/W2380193321","https://openalex.org/W4292830139","https://openalex.org/W4319309705"],"abstract_inverted_index":{"Open-vocabulary":[0],"object":[1,29],"detection":[2,88,152,203,210],"(OVD)":[3],"models":[4,27,93],"are":[5,122,136],"considered":[6],"to":[7,14,45,107,124,145],"be":[8,159],"Large":[9],"Multi-modal":[10],"Models":[11],"(LMM),":[12],"due":[13],"their":[15,36,63],"extensive":[16],"training":[17],"data":[18],"and":[19,81,132,185,204],"a":[20,79,105,115,147],"large":[21],"number":[22],"of":[23,90,170],"parameters.":[24],"Mainstream":[25],"OVD":[26,92,165],"prioritize":[28],"coarse-grained":[30],"category":[31],"rather":[32],"than":[33],"focus":[34],"on":[35,55,181,189],"fine-grained":[37,71,96,201,209],"attributes,":[38],"e.g.,":[39],"colors":[40],"or":[41,161],"materials,":[42],"thus":[43],"failed":[44],"identify":[46],"objects":[47],"specified":[48],"with":[49,59],"certain":[50],"attributes.":[51,72],"Despite":[52],"being":[53],"pretrained":[54],"large-scale":[56],"image-text":[57],"pairs":[58],"rich":[60],"attribute":[61,109,202],"information,":[62],"latent":[64],"feature":[65,150],"space":[66,144],"does":[67],"not":[68],"highlight":[69],"these":[70],"In":[73],"this":[74],"paper,":[75],"we":[76],"introduce":[77],"HA-FGOVD,":[78],"universal":[80],"explicit":[82,99],"method":[83,196],"that":[84,176,194],"enhances":[85],"the":[86,168,182,190],"attribute-level":[87],"capabilities":[89],"frozen":[91],"by":[94],"highlighting":[95],"attributes":[97],"in":[98,111,128,142,200],"linear":[100,143],"space.":[101],"Our":[102],"approach":[103],"uses":[104],"LLM":[106],"extract":[108],"words":[110],"input":[112],"text":[113,126],"as":[114,139],"zero-shot":[116],"task.":[117],"Then,":[118],"token":[119],"attention":[120],"masks":[121],"adjusted":[123],"guide":[125],"encoders":[127],"extracting":[129],"both":[130],"global":[131],"attribute-specific":[133],"features,":[134],"which":[135],"explicitly":[137],"composited":[138],"two":[140],"vectors":[141],"form":[146],"new":[148],"attribute-highlighted":[149],"for":[151,207],"tasks.":[153],"The":[154],"composition":[155],"weight":[156],"scalars":[157],"can":[158],"learned":[160],"transferred":[162],"across":[163],"different":[164],"models,":[166],"showcasing":[167],"universality":[169],"our":[171,195],"method.":[172],"Experimental":[173],"results":[174],"show":[175],"HA-FGOVD":[177],"achieves":[178],"state-of-the-art":[179],"performance":[180],"FG-OVD":[183],"benchmark":[184],"demonstrates":[186],"promising":[187],"generalization":[188],"OVDEval":[191],"benchmark,":[192],"suggesting":[193],"addresses":[197],"significant":[198],"limitations":[199],"has":[205],"potential":[206],"broader":[208],"applications.":[211]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
