{"id":"https://openalex.org/W4403791264","doi":"https://doi.org/10.1145/3664647.3681058","title":"QueryMatch: A Query-based Contrastive Learning Framework for Weakly Supervised Visual Grounding","display_name":"QueryMatch: A Query-based Contrastive Learning Framework for Weakly Supervised Visual Grounding","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791264","doi":"https://doi.org/10.1145/3664647.3681058"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681058","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681058","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000088252","display_name":"S.L. Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shengxin Chen","raw_affiliation_strings":["Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102997988","display_name":"Gen Luo","orcid":"https://orcid.org/0000-0001-5334-1843"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gen Luo","raw_affiliation_strings":["Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091531008","display_name":"Yiyi Zhou","orcid":"https://orcid.org/0000-0002-5110-4526"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiyi Zhou","raw_affiliation_strings":["Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059926864","display_name":"Xiaoshuai Sun","orcid":"https://orcid.org/0000-0003-3912-9306"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoshuai Sun","raw_affiliation_strings":["Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020568230","display_name":"Guannan Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guannan Jiang","raw_affiliation_strings":["Intelligent Manufacturing Department, Contemporary Amperex Technology Co., Limited, Ningde, Fujian, China"],"affiliations":[{"raw_affiliation_string":"Intelligent Manufacturing Department, Contemporary Amperex Technology Co., Limited, Ningde, Fujian, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016080094","display_name":"Rongrong Ji","orcid":"https://orcid.org/0000-0001-9163-2932"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongrong Ji","raw_affiliation_strings":["Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China","institution_ids":["https://openalex.org/I191208505"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5000088252"],"corresponding_institution_ids":["https://openalex.org/I191208505"],"apc_list":null,"apc_paid":null,"fwci":1.0497,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.78662432,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"4177","last_page":"4186"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8148322105407715},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5814036130905151},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.5369387269020081},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4181118607521057},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06064736843109131}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8148322105407715},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5814036130905151},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.5369387269020081},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4181118607521057},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06064736843109131},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681058","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681058","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2193145675","https://openalex.org/W2798556392","https://openalex.org/W2955014993","https://openalex.org/W2963037989","https://openalex.org/W2963109634","https://openalex.org/W2963351448","https://openalex.org/W2964284374","https://openalex.org/W2980088508","https://openalex.org/W2981663434","https://openalex.org/W2987401211","https://openalex.org/W3035524453","https://openalex.org/W3096609285","https://openalex.org/W3103542727","https://openalex.org/W3106250896","https://openalex.org/W3117585461","https://openalex.org/W3138516171","https://openalex.org/W3178418424","https://openalex.org/W3179041377","https://openalex.org/W3206582857","https://openalex.org/W3216551675","https://openalex.org/W4200631575","https://openalex.org/W4212774754","https://openalex.org/W4214650614","https://openalex.org/W4224304134","https://openalex.org/W4226396876","https://openalex.org/W4282028729","https://openalex.org/W4283312843","https://openalex.org/W4283688199","https://openalex.org/W4294310675","https://openalex.org/W4312815172","https://openalex.org/W4312980231","https://openalex.org/W4386071755","https://openalex.org/W4386071798","https://openalex.org/W4390871756","https://openalex.org/W4390873170","https://openalex.org/W4390873641","https://openalex.org/W4401109855"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2021787609","https://openalex.org/W2390279801","https://openalex.org/W1537063595","https://openalex.org/W2097328689","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Visual":[0],"grounding":[1,109,188],"is":[2],"a":[3,11,66,87,111],"task":[4],"of":[5,89,151,172,186,206,233],"locating":[6],"the":[7,34,39,58,120,149,168,203,231],"object":[8,53],"referred":[9],"by":[10,155],"natural":[12],"language":[13],"description.":[14],"To":[15,174],"reduce":[16],"annotation":[17],"costs,":[18],"recent":[19],"researchers":[20],"are":[21,46,241],"devoted":[22],"into":[23],"one-stage":[24,69],"weakly":[25,72,106,135,169,236],"supervised":[26,73,107,136,170,237],"methods":[27],"for":[28,71,133],"visual":[29,74,99,108],"grounding,":[30,75],"which":[31,55,92,115],"typically":[32],"adopt":[33],"anchor-text":[35],"matching":[36,113],"paradigm.":[37],"Despite":[38],"efficiency,":[40],"we":[41,64,127,178],"identify":[42],"that":[43],"anchor":[44],"representations":[45],"often":[47],"noisy":[48],"and":[49,195,220],"insufficient":[50],"to":[51,147],"describe":[52],"information,":[54],"inevitably":[56],"hinders":[57],"vision-language":[59],"alignments.":[60],"In":[61,101,143],"this":[62,102,162],"paper,":[63],"propose":[65,129],"novel":[67],"query-based":[68,121,152],"framework":[70],"namely":[76,138],"QueryMatch.":[77,173],"Different":[78],"from":[79],"previous":[80],"work,":[81],"QueryMatch":[82,104,207],"represents":[83],"candidate":[84],"objects":[85],"with":[86,98],"set":[88],"query":[90,159],"features,":[91],"inherently":[93],"establish":[94],"accurate":[95],"one-to-one":[96],"associations":[97],"objects.":[100],"case,":[103],"re-formulates":[105],"as":[110],"query-text":[112],"problem,":[114],"can":[116,165],"be":[117],"optimized":[118],"via":[119],"contrastive":[122,153],"learning.":[123,238],"Based":[124],"on":[125,182,216,224],"QueryMatch,":[126],"further":[128],"an":[130],"innovative":[131],"strategy":[132],"effective":[134],"learning,":[137],"Active":[139],"Query":[140],"Selection":[141],"(AQS).":[142],"particular,":[144],"AQS":[145,164,234],"aims":[146],"enhance":[148],"effectiveness":[150,232],"learning":[154,171],"actively":[156],"selecting":[157],"high-quality":[158],"features.":[160],"Through":[161],"strategy,":[163],"greatly":[166],"benefit":[167],"validate":[175],"our":[176],"approach,":[177],"conduct":[179],"extensive":[180],"experiments":[181],"three":[183],"benchmark":[184],"datasets":[185],"two":[187,209],"tasks,":[189,210],"i.e.,":[190],"referring":[191],"expression":[192],"comprehension":[193],"(REC)":[194],"segmentation":[196],"(RES).":[197],"Experimental":[198],"results":[199],"not":[200],"only":[201],"show":[202],"state-of-art":[204],"performance":[205],"in":[208,218,226,235],"e.g.,":[211],"over":[212,221],"+5%":[213],"[email":[214],"protected]":[215],"RefCOCO":[217,225],"REC":[219],"+20%":[222],"mIOU":[223],"RES,":[227],"but":[228],"also":[229],"confirm":[230],"Source":[239],"codes":[240],"available":[242],"at":[243],"https://github.com/TensorThinker/QueryMatch.":[244]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
