{"id":"https://openalex.org/W4387272106","doi":"https://doi.org/10.1109/tmm.2023.3321501","title":"CLIP-VG: Self-Paced Curriculum Adapting of CLIP for Visual Grounding","display_name":"CLIP-VG: Self-Paced Curriculum Adapting of CLIP for Visual Grounding","publication_year":2023,"publication_date":"2023-10-02","ids":{"openalex":"https://openalex.org/W4387272106","doi":"https://doi.org/10.1109/tmm.2023.3321501"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2023.3321501","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3321501","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100680501","display_name":"Linhui Xiao","orcid":"https://orcid.org/0000-0003-2592-5264"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Linhui Xiao","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China","Peng Cheng Laboratory (PCL), Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-2592-5264","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Peng Cheng Laboratory (PCL), Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083991825","display_name":"Xiaoshan Yang","orcid":"https://orcid.org/0000-0001-5453-9755"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoshan Yang","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China","Peng Cheng Laboratory (PCL), Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-5453-9755","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Peng Cheng Laboratory (PCL), Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101801577","display_name":"Fang Peng","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fang Peng","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","Peng Cheng Laboratory (PCL), Shenzhen, China","School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3948-7413","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Peng Cheng Laboratory (PCL), Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100452477","display_name":"Ming Yan","orcid":"https://orcid.org/0000-0003-4959-8878"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Yan","raw_affiliation_strings":["DAMO Academy, Alibaba Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0003-4959-8878","affiliations":[{"raw_affiliation_string":"DAMO Academy, Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100631216","display_name":"Yaowei Wang","orcid":"https://orcid.org/0000-0003-2197-9038"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaowei Wang","raw_affiliation_strings":["Peng Cheng Laboratory, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-2197-9038","affiliations":[{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022636178","display_name":"Changsheng Xu","orcid":"https://orcid.org/0000-0001-8343-9665"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changsheng Xu","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","Peng Cheng Laboratory (PCL), Shenzhen, China","School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8343-9665","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Peng Cheng Laboratory (PCL), Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences (UCAS), Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100680501"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210112150","https://openalex.org/I4210136793","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":6.5487,"has_fulltext":false,"cited_by_count":57,"citation_normalized_percentile":{"value":0.97782731,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"26","issue":null,"first_page":"4334","last_page":"4347"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8099141120910645},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.5838427543640137},{"id":"https://openalex.org/keywords/curriculum","display_name":"Curriculum","score":0.4901030957698822},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.35524633526802063},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.17793706059455872},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09192532300949097},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.07126131653785706},{"id":"https://openalex.org/keywords/pedagogy","display_name":"Pedagogy","score":0.059414029121398926}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8099141120910645},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.5838427543640137},{"id":"https://openalex.org/C47177190","wikidata":"https://www.wikidata.org/wiki/Q207137","display_name":"Curriculum","level":2,"score":0.4901030957698822},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.35524633526802063},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.17793706059455872},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09192532300949097},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.07126131653785706},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.059414029121398926}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2023.3321501","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3321501","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1100260481","display_name":null,"funder_award_id":"62072455","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1498893086","display_name":null,"funder_award_id":"62036012","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6544960559","display_name":null,"funder_award_id":"62322212","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":71,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W1773149199","https://openalex.org/W1933349210","https://openalex.org/W2251512949","https://openalex.org/W2296073425","https://openalex.org/W2345837149","https://openalex.org/W2489434015","https://openalex.org/W2558535589","https://openalex.org/W2904549000","https://openalex.org/W2904910963","https://openalex.org/W2946086442","https://openalex.org/W2950695840","https://openalex.org/W2962766617","https://openalex.org/W2963042258","https://openalex.org/W2963109634","https://openalex.org/W2963445828","https://openalex.org/W2963614783","https://openalex.org/W2963735856","https://openalex.org/W2963800628","https://openalex.org/W2964345792","https://openalex.org/W2981663434","https://openalex.org/W2984121207","https://openalex.org/W2984194315","https://openalex.org/W2986803748","https://openalex.org/W2987401211","https://openalex.org/W2989176720","https://openalex.org/W3034655362","https://openalex.org/W3034772468","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3110435696","https://openalex.org/W3112077297","https://openalex.org/W3117585461","https://openalex.org/W3120329650","https://openalex.org/W3123742938","https://openalex.org/W3126391825","https://openalex.org/W3142849873","https://openalex.org/W3159619744","https://openalex.org/W3163747765","https://openalex.org/W3170767867","https://openalex.org/W3171547673","https://openalex.org/W3174697615","https://openalex.org/W3178418424","https://openalex.org/W3179041377","https://openalex.org/W3186567887","https://openalex.org/W3213511181","https://openalex.org/W4205421564","https://openalex.org/W4214490042","https://openalex.org/W4224807381","https://openalex.org/W4225517085","https://openalex.org/W4226104391","https://openalex.org/W4285192809","https://openalex.org/W4289126595","https://openalex.org/W4293868331","https://openalex.org/W4307106676","https://openalex.org/W4312351586","https://openalex.org/W4312773012","https://openalex.org/W4320036901","https://openalex.org/W4366352717","https://openalex.org/W4385245566","https://openalex.org/W4386071687","https://openalex.org/W4391451889","https://openalex.org/W6679390333","https://openalex.org/W6757135208","https://openalex.org/W6766648584","https://openalex.org/W6791353385","https://openalex.org/W6797397777","https://openalex.org/W6798805250","https://openalex.org/W6803567076","https://openalex.org/W6810290167","https://openalex.org/W6810447287"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2021787609","https://openalex.org/W2390279801","https://openalex.org/W1537063595","https://openalex.org/W2097328689","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4234899305"],"abstract_inverted_index":{"Visual":[0],"Grounding":[1],"(VG)":[2],"is":[3,51],"a":[4,17,91,107,152,171],"crucial":[5],"topic":[6],"in":[7,177],"the":[8,28,45,55,79,116,121,126,159,165],"field":[9],"of":[10,47,57,86,100,118],"vision":[11,72],"and":[12,59,73,82,133,156,180,190],"language,":[13],"which":[14,138],"involves":[15],"locating":[16],"specific":[18],"region":[19],"described":[20],"by":[21,170],"expressions":[22],"within":[23],"an":[24,147],"image.":[25],"To":[26],"reduce":[27],"reliance":[29],"on":[30,54,125,174],"manually":[31],"labeled":[32],"data,":[33],"unsupervised":[34,49,168],"methods":[35,50,61],"have":[36],"been":[37],"developed":[38],"to":[39,70,77,114,120,145,188,192],"locate":[40],"regions":[41],"using":[42],"pseudo-labels.":[43],"However,":[44],"performance":[46],"existing":[48,200],"highly":[52],"dependent":[53],"quality":[56],"pseudo-labels":[58,144],"these":[60],"always":[62],"encounter":[63],"issues":[64],"with":[65,102,183],"limited":[66],"diversity.":[67],"In":[68],"order":[69],"utilize":[71],"language":[74],"pre-trained":[75],"models":[76],"address":[78],"grounding":[80],"problem,":[81],"reasonably":[83],"take":[84],"advantage":[85],"pseudo-labels,":[87],"we":[88,129],"propose":[89,106,131],"CLIP-VG,":[90],"novel":[92],"method":[93,163,169],"that":[94],"can":[95,139],"conduct":[96],"self-paced":[97],"curriculum":[98,135],"adapting":[99,136],"CLIP":[101,119],"pseudo-language":[103,160],"labels.":[104,161],"We":[105],"simple":[108],"yet":[109],"efficient":[110],"end-to-end":[111],"network":[112],"architecture":[113],"realize":[115],"transfer":[117],"visual":[122],"grounding.":[123],"Based":[124],"CLIP-based":[127],"architecture,":[128],"further":[130],"single-source":[132,179],"multi-source":[134,181],"algorithms,":[137],"progressively":[140],"find":[141],"more":[142],"reliable":[143],"learn":[146],"optimal":[148],"model,":[149],"thereby":[150],"achieving":[151],"balance":[153],"between":[154],"reliability":[155],"diversity":[157],"for":[158],"Our":[162],"outperforms":[164,199],"current":[166],"state-of-the-art":[167],"significant":[172],"margin":[173],"RefCOCO/+/g":[175],"datasets":[176],"both":[178],"scenarios,":[182],"improvements":[184],"ranging":[185],"from":[186],"6.78%":[187],"10.67%":[189],"11.39%":[191],"14.87%,":[193],"respectively.":[194],"Furthermore,":[195],"our":[196],"approach":[197],"even":[198],"weakly":[201],"supervised":[202],"methods.":[203]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":40},{"year":2024,"cited_by_count":10}],"updated_date":"2026-05-12T08:28:47.272897","created_date":"2025-10-10T00:00:00"}
