{"id":"https://openalex.org/W4415709096","doi":"https://doi.org/10.1109/icme59968.2025.11210032","title":"CLIP Brings Better Features to Visual Aesthetics Learners","display_name":"CLIP Brings Better Features to Visual Aesthetics Learners","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415709096","doi":"https://doi.org/10.1109/icme59968.2025.11210032"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11210032","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11210032","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040844494","display_name":"Liwu Xu","orcid":"https://orcid.org/0009-0004-3489-4169"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liwu Xu","raw_affiliation_strings":["OPPO AI Center"],"affiliations":[{"raw_affiliation_string":"OPPO AI Center","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113220383","display_name":"Jinjin Xu","orcid":"https://orcid.org/0000-0002-2428-0305"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jinjin Xu","raw_affiliation_strings":["OPPO AI Center"],"affiliations":[{"raw_affiliation_string":"OPPO AI Center","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027089869","display_name":"Yuzhe Yang","orcid":"https://orcid.org/0000-0001-9098-2105"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuzhe Yang","raw_affiliation_strings":["OPPO AI Center"],"affiliations":[{"raw_affiliation_string":"OPPO AI Center","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101710707","display_name":"Xilu Wang","orcid":"https://orcid.org/0000-0002-0926-4454"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Xilu Wang","raw_affiliation_strings":["University of Surrey"],"affiliations":[{"raw_affiliation_string":"University of Surrey","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101266070","display_name":"Yi-Jie Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi-Jie Huang","raw_affiliation_strings":["OPPO AI Center"],"affiliations":[{"raw_affiliation_string":"OPPO AI Center","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075992428","display_name":"Yaqian Li","orcid":"https://orcid.org/0000-0003-3582-9997"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yaqian Li","raw_affiliation_strings":["OPPO AI Center"],"affiliations":[{"raw_affiliation_string":"OPPO AI Center","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5040844494"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1919,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.84379184,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.5062999725341797,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.5062999725341797,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3682999908924103,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.029899999499320984,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6621999740600586},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5703999996185303},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5375999808311462},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.5055000185966492},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4593000113964081},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.42750000953674316}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6868000030517578},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6621999740600586},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5703999996185303},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5375999808311462},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5357999801635742},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.5055000185966492},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.48570001125335693},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4593000113964081},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.42750000953674316},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3630000054836273},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3497999906539917},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3400000035762787},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.320499986410141},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2858999967575073},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.26420000195503235}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11210032","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11210032","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2078807908","https://openalex.org/W2108598243","https://openalex.org/W2170658603","https://openalex.org/W2417288846","https://openalex.org/W2467531333","https://openalex.org/W2754213847","https://openalex.org/W2897926040","https://openalex.org/W2931027027","https://openalex.org/W2963163009","https://openalex.org/W2963289467","https://openalex.org/W2976886057","https://openalex.org/W3015735225","https://openalex.org/W3035523707","https://openalex.org/W3171472210","https://openalex.org/W3198675127","https://openalex.org/W4214745154","https://openalex.org/W4280563717","https://openalex.org/W4281846337","https://openalex.org/W4285606417","https://openalex.org/W4285787548","https://openalex.org/W4287322212","https://openalex.org/W4288083516","https://openalex.org/W4312353506","https://openalex.org/W4312933868","https://openalex.org/W4312960937","https://openalex.org/W4322576760","https://openalex.org/W4382462760","https://openalex.org/W4386076169","https://openalex.org/W4386076227","https://openalex.org/W4386076522"],"related_works":[],"abstract_inverted_index":{"Image":[0],"Aesthetics":[1],"Assessment":[2],"(IAA)":[3],"is":[4],"a":[5,90,104,117,155],"challenging":[6],"task":[7],"due":[8],"to":[9,42,53,80,102,121,141,150,214],"its":[10],"subjective":[11],"nature":[12],"and":[13,44,75,92,129,172,199,202],"expensive":[14],"manual":[15],"annotations.":[16],"Recent":[17],"large-scale":[18],"vision-language":[19],"models,":[20,132,145,216],"such":[21],"as":[22],"Contrastive":[23],"Language-Image":[24],"Pre-training":[25],"(CLIP),":[26],"have":[27,58],"shown":[28],"their":[29],"promising":[30],"representation":[31,213,235],"capability":[32],"for":[33,170,223],"various":[34],"downstream":[35,151],"tasks.":[36],"However,":[37],"the":[38,123,158,174,182,207,224,232],"application":[39],"of":[40,125,196,210,227,236],"CLIP":[41,55,127],"resource-constrained":[43],"low-data":[45,156],"IAA":[46,57,106,130,144,152,192,215,228,237],"tasks":[47,153],"remains":[48],"limited.":[49],"While":[50],"few":[51],"attempts":[52],"leverage":[54],"in":[56,154],"mainly":[59],"focused":[60],"on":[61,188],"carefully":[62],"designed":[63],"prompts,":[64],"we":[65,88],"extend":[66],"beyond":[67],"this":[68],"by":[69],"allowing":[70],"models":[71],"from":[72,83,137],"different":[73,77],"domains":[74],"with":[76,167],"model":[78,107,225],"sizes":[79],"acquire":[81],"knowledge":[82,176],"CLIP.":[84],"To":[85,147],"achieve":[86],"this,":[87],"propose":[89],"unified":[91],"flexible":[93],"two-phase":[94],"CLIP-based":[95],"Semi-supervised":[96],"Knowledge":[97],"Distillation":[98],"(CSKD)":[99],"paradigm,":[100],"aiming":[101],"learn":[103],"lightweight":[105,143],"while":[108],"leveraging":[109],"CLIP\u2019s":[110,211],"strong":[111,160],"generalization":[112],"capability.":[113],"Specifically,":[114],"CSKD":[115,184],"employs":[116],"feature":[118,204,212,234],"alignment":[119,205],"strategy":[120],"facilitate":[122],"distillation":[124,166],"heterogeneous":[126],"teacher":[128],"student":[131],"effectively":[133],"transferring":[134,173],"valuable":[135,221],"features":[136],"pre-trained":[138],"visual":[139,161],"representations":[140],"two":[142,159],"respectively.":[146],"efficiently":[148],"adapt":[149],"regime,":[157],"aesthetics":[162],"learners":[163],"then":[164],"conduct":[165],"unlabeled":[168],"examples":[169],"refining":[171],"task-specific":[175],"collaboratively.":[177],"Extensive":[178],"experiments":[179],"demonstrate":[180],"that":[181],"proposed":[183],"achieves":[185],"state-of-the-art":[186],"performance":[187],"multiple":[189],"widely":[190],"used":[191],"benchmarks.":[193],"Furthermore,":[194],"analysis":[195],"attention":[197],"distance":[198],"entropy":[200],"before":[201],"after":[203],"shows":[206],"effective":[208],"transfer":[209],"which":[217],"not":[218],"only":[219],"provides":[220],"guidance":[222],"initialization":[226],"but":[229],"also":[230],"enhances":[231],"aesthetic":[233],"models.":[238],"Code":[239],"will":[240],"be":[241],"made":[242],"publicly":[243],"available.":[244]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-30T00:00:00"}
