{"id":"https://openalex.org/W4395064954","doi":"https://doi.org/10.1145/3664647.3681071","title":"HiVG: Hierarchical Multimodal Fine-grained Modulation for Visual Grounding","display_name":"HiVG: Hierarchical Multimodal Fine-grained Modulation for Visual Grounding","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4395064954","doi":"https://doi.org/10.1145/3664647.3681071"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681071","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681071","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3664647.3681071","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100680501","display_name":"Linhui Xiao","orcid":"https://orcid.org/0000-0003-2592-5264"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Linhui Xiao","raw_affiliation_strings":["MAIS, Institute of Automation, Chinese Academy of Sciences &amp; Pengcheng Laboratory, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2592-5264","affiliations":[{"raw_affiliation_string":"MAIS, Institute of Automation, Chinese Academy of Sciences &amp; Pengcheng Laboratory, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083991825","display_name":"Xiaoshan Yang","orcid":"https://orcid.org/0000-0001-5453-9755"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoshan Yang","raw_affiliation_strings":["MAIS, Institute of Automation, Chinese Academy of Sciences &amp; Pengcheng Laboratory, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5453-9755","affiliations":[{"raw_affiliation_string":"MAIS, Institute of Automation, Chinese Academy of Sciences &amp; Pengcheng Laboratory, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Fang Peng","orcid":"https://orcid.org/0000-0002-3948-7413"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fang Peng","raw_affiliation_strings":["MAIS, Institute of Automation, Chinese Academy of Sciences &amp; Pengcheng Laboratory, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3948-7413","affiliations":[{"raw_affiliation_string":"MAIS, Institute of Automation, Chinese Academy of Sciences &amp; Pengcheng Laboratory, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000037237","display_name":"Yaowei Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaowei Wang","raw_affiliation_strings":["Pengcheng Laboratory &amp; Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-6110-4036","affiliations":[{"raw_affiliation_string":"Pengcheng Laboratory &amp; Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022636178","display_name":"Changsheng Xu","orcid":"https://orcid.org/0000-0001-8343-9665"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changsheng Xu","raw_affiliation_strings":["MAIS, Institute of Automation, Chinese Academy of Sciences &amp; Pengcheng Laboratory, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8343-9665","affiliations":[{"raw_affiliation_string":"MAIS, Institute of Automation, Chinese Academy of Sciences &amp; Pengcheng Laboratory, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100680501"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879"],"apc_list":null,"apc_paid":null,"fwci":4.6039,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.95835722,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5460","last_page":"5469"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9760000109672546,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9760000109672546,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9613000154495239,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9591000080108643,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modulation","display_name":"Modulation (music)","score":0.5365927219390869},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5005745887756348},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.4385848939418793},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.20879393815994263},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14752760529518127},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.11590754985809326},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.09960970282554626}],"concepts":[{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.5365927219390869},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5005745887756348},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.4385848939418793},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.20879393815994263},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14752760529518127},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.11590754985809326},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.09960970282554626}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3664647.3681071","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681071","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2404.13400","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.13400","pdf_url":"https://arxiv.org/pdf/2404.13400","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3664647.3681071","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681071","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1100260481","display_name":null,"funder_award_id":"62072455","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1498893086","display_name":null,"funder_award_id":"62036012","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1692201237","display_name":null,"funder_award_id":"PCL2023A08","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2246614840","display_name":null,"funder_award_id":"2021ZD0112200","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"},{"id":"https://openalex.org/G3087610523","display_name":null,"funder_award_id":"U23A20387","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6544960559","display_name":null,"funder_award_id":"62322212","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W1773149199","https://openalex.org/W1933349210","https://openalex.org/W2194775991","https://openalex.org/W2927562344","https://openalex.org/W2946086442","https://openalex.org/W2952524542","https://openalex.org/W2962766617","https://openalex.org/W2962784628","https://openalex.org/W2963109634","https://openalex.org/W2963351448","https://openalex.org/W2963735856","https://openalex.org/W2964345792","https://openalex.org/W2987734933","https://openalex.org/W2996455784","https://openalex.org/W3090449556","https://openalex.org/W3112077297","https://openalex.org/W3138516171","https://openalex.org/W3152619510","https://openalex.org/W3159619744","https://openalex.org/W3171547673","https://openalex.org/W3216551675","https://openalex.org/W4205817612","https://openalex.org/W4214490042","https://openalex.org/W4285192809","https://openalex.org/W4309181071","https://openalex.org/W4312092966","https://openalex.org/W4312351586","https://openalex.org/W4312446817","https://openalex.org/W4313068342","https://openalex.org/W4320036901","https://openalex.org/W4382464354","https://openalex.org/W4385571791","https://openalex.org/W4386066484","https://openalex.org/W4386076636","https://openalex.org/W4387969564","https://openalex.org/W4391216149","https://openalex.org/W4401043311","https://openalex.org/W4401452744","https://openalex.org/W4402778495"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2021787609","https://openalex.org/W2390279801","https://openalex.org/W2097328689","https://openalex.org/W2358668433","https://openalex.org/W4234899305","https://openalex.org/W1537063595","https://openalex.org/W2379604501","https://openalex.org/W2376932109"],"abstract_inverted_index":{"Visual":[0],"grounding,":[1,123],"which":[2],"aims":[3],"to":[4,27,55,75,148],"ground":[5],"a":[6,13,81,96,102,126,152],"visual":[7,29,117,130],"region":[8],"via":[9],"natural":[10],"language,":[11],"is":[12],"task":[14,59,68],"that":[15],"heavily":[16],"relies":[17],"on":[18,61,157],"cross-modal":[19,99,110,144],"alignment.":[20],"Existing":[21],"works":[22],"utilized":[23],"uni-modal":[24],"pre-trained":[25],"models":[26],"transfer":[28],"or":[30],"linguistic":[31],"knowledge":[32],"separately":[33],"while":[34],"ignoring":[35],"the":[36,57,114,136,143,161,168],"multimodal":[37,62,86,104],"corresponding":[38],"information.":[39],"Motivated":[40],"by":[41,141],"recent":[42],"advancements":[43],"in":[44,151],"contrastive":[45],"language-image":[46],"pre-training":[47,71],"and":[48,72,83,101,119,124,131,166],"low-rank":[49,105],"adaptation":[50,106],"(LoRA)":[51],"methods,":[52],"we":[53,79],"aim":[54],"solve":[56],"grounding":[58,170],"based":[60],"pre-training.":[63],"However,":[64],"there":[65],"exists":[66],"significant":[67,169],"gaps":[69],"between":[70,116,128],"grounding.":[73],"Therefore,":[74],"address":[76,113],"these":[77],"gaps,":[78],"propose":[80],"concise":[82],"efficient":[84],"hierarchical":[85,103,153],"fine-grained":[87],"modulation":[88],"framework,":[89],"namely":[90],"HiVG.":[91],"Specifically,":[92],"HiVG":[93],"consists":[94],"of":[95,138,163],"multi-layer":[97],"adaptive":[98],"bridge":[100,111],"(HiLoRA)":[107],"paradigm.":[108],"The":[109,179],"can":[112],"inconsistency":[115],"features":[118,145],"those":[120],"required":[121],"for":[122],"establish":[125],"connection":[127],"multi-level":[129],"text":[132],"features.":[133],"HiLoRA":[134],"prevents":[135],"accumulation":[137],"perceptual":[139],"errors":[140],"adapting":[142],"from":[146],"shallow":[147],"deep":[149],"layers":[150],"manner.":[154],"Experimental":[155],"results":[156],"five":[158],"datasets":[159],"demonstrate":[160],"effectiveness":[162],"our":[164],"approach":[165],"showcase":[167],"capabilities":[171],"as":[172,174],"well":[173],"promising":[175],"energy":[176],"efficiency":[177],"advantages.":[178],"project":[180],"page:":[181],"https://github.com/linhuixiao/HiVG.":[182]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
