{"id":"https://openalex.org/W4392909058","doi":"https://doi.org/10.1109/icassp48485.2024.10445945","title":"VGDIFFZERO: Text-To-Image Diffusion Models Can Be Zero-Shot Visual Grounders","display_name":"VGDIFFZERO: Text-To-Image Diffusion Models Can Be Zero-Shot Visual Grounders","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392909058","doi":"https://doi.org/10.1109/icassp48485.2024.10445945"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10445945","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10445945","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100624009","display_name":"Xuyang Liu","orcid":"https://orcid.org/0000-0002-0691-9371"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]},{"id":"https://openalex.org/I24201400","display_name":"Chengdu University of Information Technology","ror":"https://ror.org/01yxwrh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I24201400"]},{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuyang Liu","raw_affiliation_strings":["Sichuan University,College of Electronics and Information Engineering,Chengdu,China","College of Electronics and Information Engineering, Sichuan University, Chengdu, China","School of Engineering, Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Sichuan University,College of Electronics and Information Engineering,Chengdu,China","institution_ids":["https://openalex.org/I24185976"]},{"raw_affiliation_string":"College of Electronics and Information Engineering, Sichuan University, Chengdu, China","institution_ids":["https://openalex.org/I24201400","https://openalex.org/I24185976"]},{"raw_affiliation_string":"School of Engineering, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043876209","display_name":"Siteng Huang","orcid":"https://orcid.org/0000-0002-9735-1186"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siteng Huang","raw_affiliation_strings":["Westlake University,School of Engineering,Hangzhou,China","School of Engineering, Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Westlake University,School of Engineering,Hangzhou,China","institution_ids":["https://openalex.org/I3133055985"]},{"raw_affiliation_string":"School of Engineering, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034585885","display_name":"Yachen Kang","orcid":"https://orcid.org/0000-0002-2979-1286"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yachen Kang","raw_affiliation_strings":["Westlake University,School of Engineering,Hangzhou,China","School of Engineering, Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Westlake University,School of Engineering,Hangzhou,China","institution_ids":["https://openalex.org/I3133055985"]},{"raw_affiliation_string":"School of Engineering, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108147162","display_name":"Honggang Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I24201400","display_name":"Chengdu University of Information Technology","ror":"https://ror.org/01yxwrh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I24201400"]},{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Honggang Chen","raw_affiliation_strings":["Sichuan University,College of Electronics and Information Engineering,Chengdu,China","College of Electronics and Information Engineering, Sichuan University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Sichuan University,College of Electronics and Information Engineering,Chengdu,China","institution_ids":["https://openalex.org/I24185976"]},{"raw_affiliation_string":"College of Electronics and Information Engineering, Sichuan University, Chengdu, China","institution_ids":["https://openalex.org/I24201400","https://openalex.org/I24185976"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100665183","display_name":"Donglin Wang","orcid":"https://orcid.org/0000-0003-1359-6440"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Donglin Wang","raw_affiliation_strings":["Westlake University,School of Engineering,Hangzhou,China","School of Engineering, Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Westlake University,School of Engineering,Hangzhou,China","institution_ids":["https://openalex.org/I3133055985"]},{"raw_affiliation_string":"School of Engineering, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100624009"],"corresponding_institution_ids":["https://openalex.org/I24185976","https://openalex.org/I24201400","https://openalex.org/I3133055985"],"apc_list":null,"apc_paid":null,"fwci":2.9123,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.91881072,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2765","last_page":"2769"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10515","display_name":"Cancer-related molecular mechanisms research","score":0.9764999747276306,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.883630633354187},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8200693130493164},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6046724915504456},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5956963300704956},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5733271241188049},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5543500781059265},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5168910026550293},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5141494870185852},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.48770245909690857},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.48209694027900696},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38982462882995605},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3577864170074463},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3488726019859314},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.05949011445045471}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.883630633354187},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8200693130493164},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6046724915504456},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5956963300704956},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5733271241188049},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5543500781059265},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5168910026550293},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5141494870185852},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.48770245909690857},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.48209694027900696},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38982462882995605},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3577864170074463},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3488726019859314},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.05949011445045471},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10445945","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10445945","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2802911279","display_name":null,"funder_award_id":"Young","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3085993365","display_name":null,"funder_award_id":"(Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4020255992","display_name":null,"funder_award_id":"Project","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5265214819","display_name":null,"funder_award_id":"62176215","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5760752404","display_name":null,"funder_award_id":"Projects","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6059964893","display_name":null,"funder_award_id":"STI 2030","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7546200249","display_name":null,"funder_award_id":"62001316","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8018449572","display_name":null,"funder_award_id":"2022ZD0208800","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8955107213","display_name":null,"funder_award_id":"Major","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335581","display_name":"Young Scientists Fund","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1901129140","https://openalex.org/W2251329024","https://openalex.org/W2489434015","https://openalex.org/W2963109634","https://openalex.org/W2964345792","https://openalex.org/W3173220247","https://openalex.org/W4306820534","https://openalex.org/W4312933868","https://openalex.org/W4377130816","https://openalex.org/W4380994554","https://openalex.org/W4382464354","https://openalex.org/W4386057725","https://openalex.org/W4386076148","https://openalex.org/W4386076636","https://openalex.org/W4386083151","https://openalex.org/W4390872669","https://openalex.org/W4390873429","https://openalex.org/W4391274652","https://openalex.org/W4391451889","https://openalex.org/W4392904512","https://openalex.org/W6679045638","https://openalex.org/W6779823529","https://openalex.org/W6791353385","https://openalex.org/W6846007759","https://openalex.org/W6853472504","https://openalex.org/W6853573902"],"related_works":["https://openalex.org/W4396941953","https://openalex.org/W2093104230","https://openalex.org/W2987280934","https://openalex.org/W4390874210","https://openalex.org/W4384918963","https://openalex.org/W4365211920","https://openalex.org/W2128027845","https://openalex.org/W3014948380","https://openalex.org/W4386184937","https://openalex.org/W4241564561"],"abstract_inverted_index":{"Large-scale":[0],"text-to-image":[1,82],"diffusion":[2,51,83],"models":[3],"have":[4],"shown":[5],"impressive":[6],"capabilities":[7],"for":[8],"generative":[9,50],"tasks":[10,22],"by":[11],"leveraging":[12],"strong":[13,113],"vision-language":[14,20],"alignment":[15],"from":[16],"pre-training.":[17],"However,":[18],"most":[19],"discriminative":[21,56],"require":[23],"extensive":[24],"fine-tuning":[25,63],"on":[26,81,104,115],"carefully-labeled":[27],"datasets":[28],"to":[29,53],"acquire":[30],"such":[31],"alignment,":[32],"with":[33],"great":[34],"cost":[35],"in":[36],"time":[37],"and":[38,64,95,107],"computing":[39],"resources.":[40],"In":[41],"this":[42],"work,":[43],"we":[44,69],"explore":[45],"directly":[46],"applying":[47],"a":[48,72,88],"pre-trained":[49],"model":[52],"the":[54],"challenging":[55],"task":[57],"of":[58,98],"visual":[59,77,117],"grounding":[60,78],"without":[61],"any":[62],"additional":[65],"training":[66],"dataset.":[67],"Specifically,":[68],"propose":[70],"VGDiffZero,":[71],"simple":[73],"yet":[74],"effective":[75],"zero-shot":[76,116],"framework":[79],"based":[80],"models.":[84],"We":[85],"also":[86],"design":[87],"comprehensive":[89],"region-scoring":[90],"method":[91],"considering":[92],"both":[93],"global":[94],"local":[96],"contexts":[97],"each":[99],"isolated":[100],"proposal.":[101],"Extensive":[102],"experiments":[103],"RefCOCO,":[105],"RefCOCO+,":[106],"RefCOCOg":[108],"show":[109],"that":[110],"VGDiffZero":[111],"achieves":[112],"performance":[114],"grounding.":[118],"Our":[119],"code":[120],"is":[121],"available":[122],"at":[123],"https://github.com/xuyang-liu16/VGDiffZero.":[124]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":8}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
