{"id":"https://openalex.org/W4413158161","doi":"https://doi.org/10.1109/cvpr52734.2025.02315","title":"Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection","display_name":"Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection","publication_year":2025,"publication_date":"2025-06-10","ids":{"openalex":"https://openalex.org/W4413158161","doi":"https://doi.org/10.1109/cvpr52734.2025.02315"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52734.2025.02315","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.02315","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082756213","display_name":"Gensheng Pei","orcid":"https://orcid.org/0000-0002-7677-7487"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gensheng Pei","raw_affiliation_strings":["Nanjing University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052423709","display_name":"Tao Chen","orcid":"https://orcid.org/0000-0003-2051-7798"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Chen","raw_affiliation_strings":["Nanjing University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100767800","display_name":"Yujia Wang","orcid":"https://orcid.org/0000-0002-6402-3514"},"institutions":[{"id":"https://openalex.org/I1328775524","display_name":"Zhejiang Sci-Tech University","ror":"https://ror.org/03893we55","country_code":"CN","type":"education","lineage":["https://openalex.org/I1328775524"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujia Wang","raw_affiliation_strings":["Zhejiang Sci-Tech University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Sci-Tech University","institution_ids":["https://openalex.org/I1328775524"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078057978","display_name":"Xinhao Cai","orcid":"https://orcid.org/0009-0009-5459-3458"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinhao Cai","raw_affiliation_strings":["Nanjing University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040437528","display_name":"Xiangbo Shu","orcid":"https://orcid.org/0000-0003-4902-4663"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangbo Shu","raw_affiliation_strings":["Nanjing University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091518967","display_name":"Tianfei Zhou","orcid":"https://orcid.org/0000-0001-5475-1473"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianfei Zhou","raw_affiliation_strings":["Beijing Institute of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027545344","display_name":"Yazhou Yao","orcid":"https://orcid.org/0000-0002-0337-9410"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yazhou Yao","raw_affiliation_strings":["Nanjing University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.6106,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.94114952,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"24862","last_page":"24872"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12171","display_name":"Open Education and E-Learning","score":0.22110000252723694,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12171","display_name":"Open Education and E-Learning","score":0.22110000252723694,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.19089999794960022,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.1826000064611435,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6336307525634766},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5574724078178406},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21317636966705322}],"concepts":[{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6336307525634766},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5574724078178406},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21317636966705322}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52734.2025.02315","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.02315","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W12634471","https://openalex.org/W1861492603","https://openalex.org/W2017745767","https://openalex.org/W2017814585","https://openalex.org/W2031489346","https://openalex.org/W2108598243","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2250384498","https://openalex.org/W2886641317","https://openalex.org/W2970231061","https://openalex.org/W3037492894","https://openalex.org/W3169761117","https://openalex.org/W3173874704","https://openalex.org/W3176641147","https://openalex.org/W3177096435","https://openalex.org/W3198377975","https://openalex.org/W4312443924","https://openalex.org/W4312526532","https://openalex.org/W4312629998","https://openalex.org/W4312804044","https://openalex.org/W4312891522","https://openalex.org/W4313156423","https://openalex.org/W4386065512","https://openalex.org/W4386075997","https://openalex.org/W4386076084","https://openalex.org/W4386076522","https://openalex.org/W4390872191","https://openalex.org/W4390874126","https://openalex.org/W4392172801","https://openalex.org/W4402114612","https://openalex.org/W4402667905","https://openalex.org/W4402703126","https://openalex.org/W4402727234","https://openalex.org/W4402727766","https://openalex.org/W4402753899","https://openalex.org/W4402780389"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0],"CLIP":[1],"model":[2],"has":[3],"demonstrated":[4],"significant":[5],"advancements":[6],"in":[7,75,116,190,199],"aligning":[8],"visual":[9,79],"and":[10,23,42,81,163,193,202],"language":[11,203],"modalities":[12],"through":[13],"large-scale":[14],"pre-training":[15],"on":[16,26,38,54],"image-text":[17],"pairs,":[18],"enabling":[19],"strong":[20],"zero-shot":[21,191],"classification":[22,192],"retrieval":[24,194],"capabilities":[25],"various":[27],"domains.":[28],"However,":[29],"CLIP\u2019s":[30,100],"training":[31,63,101],"remains":[32],"computationally":[33],"intensive,":[34],"with":[35,169],"high":[36],"demands":[37],"both":[39],"data":[40],"processing":[41],"memory.":[43],"To":[44],"address":[45],"these":[46,67],"challenges,":[47],"recent":[48],"masking":[49,114],"strategies":[50],"have":[51],"emerged,":[52],"focusing":[53],"the":[55,138,148,151,159,174],"selective":[56],"removal":[57],"of":[58,121,150],"image":[59,140],"patches":[60,123,162,166],"to":[61,98,141,177],"improve":[62],"efficiency.":[64],"Although":[65],"effective,":[66],"methods":[68],"often":[69],"compromise":[70],"key":[71],"semantic":[72,106],"information,":[73],"resulting":[74],"suboptimal":[76],"alignment":[77],"between":[78,158],"features":[80],"text":[82],"descriptions.":[83],"In":[84],"this":[85],"work,":[86],"we":[87,132],"present":[88],"a":[89,112,118,179],"concise":[90],"yet":[91],"effective":[92],"approach":[93],"called":[94],"Patch":[95],"Generation-to-Selection":[96],"(CLIP-PGS)":[97],"enhance":[99],"efficiency":[102],"while":[103],"preserving":[104],"critical":[105],"con":[107],"tent.":[108],"Our":[109,183],"method":[110],"introduces":[111],"gradual":[113],"process":[115,176],"which":[117],"small":[119],"set":[120],"candidate":[122,160],"is":[124],"first":[125],"pre-selected":[126],"as":[127],"potential":[128],"mask":[129,145,161],"regions.":[130],"Then,":[131],"apply":[133],"Sobel":[134],"edge":[135,144],"detection":[136],"across":[137],"entire":[139],"generate":[142],"an":[143],"that":[146],"prioritizes":[147],"retention":[149],"primary":[152],"object":[153],"areas.":[154],"Finally,":[155],"similarity":[156,181],"scores":[157],"their":[164],"neighboring":[165],"are":[167],"computed,":[168],"optimal":[170],"transport":[171],"normalization":[172],"refining":[173],"selection":[175],"ensure":[178],"balanced":[180],"matrix.":[182],"approach,":[184],"CLIP-PGS,":[185],"sets":[186],"new":[187],"state-of-the-art":[188],"results":[189],"tasks,":[195],"achieving":[196],"superior":[197],"performance":[198],"robustness":[200],"evaluation":[201],"compositionality":[204],"benchmarks.":[205]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
