{"id":"https://openalex.org/W4403792379","doi":"https://doi.org/10.1145/3664647.3680649","title":"AesExpert: Towards Multi-modality Foundation Model for Image Aesthetics Perception","display_name":"AesExpert: Towards Multi-modality Foundation Model for Image Aesthetics Perception","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403792379","doi":"https://doi.org/10.1145/3664647.3680649"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680649","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680649","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016576297","display_name":"Yipo Huang","orcid":"https://orcid.org/0000-0003-0908-2180"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yipo Huang","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University &amp; Nanyang Technological University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University &amp; Nanyang Technological University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044278717","display_name":"Xiangfei Sheng","orcid":"https://orcid.org/0009-0004-8468-1970"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangfei Sheng","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101448608","display_name":"Zhichao Yang","orcid":"https://orcid.org/0009-0008-3398-1286"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhichao Yang","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104174422","display_name":"Quan Yuan","orcid":"https://orcid.org/0000-0001-5929-2026"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Yuan","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061798935","display_name":"Zhichao Duan","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhichao Duan","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063520709","display_name":"Pengfei Chen","orcid":"https://orcid.org/0000-0002-0509-3782"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengfei Chen","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033615240","display_name":"Leida Li","orcid":"https://orcid.org/0000-0001-9069-8796"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Leida Li","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100403129","display_name":"Weisi Lin","orcid":"https://orcid.org/0000-0001-9866-1947"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Weisi Lin","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101549504","display_name":"Guangming Shi","orcid":"https://orcid.org/0000-0003-2179-3292"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangming Shi","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5016576297"],"corresponding_institution_ids":["https://openalex.org/I149594827"],"apc_list":null,"apc_paid":null,"fwci":5.4843,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.96936005,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5911","last_page":"5920"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.7761918306350708},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.6865798234939575},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6627581119537354},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5834558010101318},{"id":"https://openalex.org/keywords/aesthetics","display_name":"Aesthetics","score":0.5052452683448792},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.49112460017204285},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.46942049264907837},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3871088922023773},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3348706364631653},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2805832624435425},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.21763664484024048},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.06287628412246704}],"concepts":[{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.7761918306350708},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.6865798234939575},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6627581119537354},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5834558010101318},{"id":"https://openalex.org/C107038049","wikidata":"https://www.wikidata.org/wiki/Q35986","display_name":"Aesthetics","level":1,"score":0.5052452683448792},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.49112460017204285},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.46942049264907837},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3871088922023773},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3348706364631653},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2805832624435425},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.21763664484024048},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.06287628412246704},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3680649","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680649","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W1985748637","https://openalex.org/W2078807908","https://openalex.org/W2105187896","https://openalex.org/W2139037672","https://openalex.org/W2150707975","https://openalex.org/W2417288846","https://openalex.org/W2529088810","https://openalex.org/W2896348597","https://openalex.org/W2990299123","https://openalex.org/W2999905431","https://openalex.org/W3002992380","https://openalex.org/W3003957020","https://openalex.org/W3004268082","https://openalex.org/W3019413400","https://openalex.org/W3034776788","https://openalex.org/W3035595647","https://openalex.org/W3091249416","https://openalex.org/W3173683732","https://openalex.org/W3208678646","https://openalex.org/W4220744054","https://openalex.org/W4285606417","https://openalex.org/W4293518946","https://openalex.org/W4309933612","https://openalex.org/W4312327698","https://openalex.org/W4312353506","https://openalex.org/W4322576760","https://openalex.org/W4382567941","https://openalex.org/W4385486275","https://openalex.org/W4386065622","https://openalex.org/W4386076227","https://openalex.org/W4387969352","https://openalex.org/W4387969606","https://openalex.org/W4392902667","https://openalex.org/W4394862887"],"related_works":["https://openalex.org/W2381393187","https://openalex.org/W2332779545","https://openalex.org/W2385859805","https://openalex.org/W2530972254","https://openalex.org/W2358060160","https://openalex.org/W2035483685","https://openalex.org/W1969764885","https://openalex.org/W596947562","https://openalex.org/W2793937822","https://openalex.org/W2461480269"],"abstract_inverted_index":{"The":[0,20],"highly":[1],"abstract":[2],"nature":[3],"of":[4,22,36,137],"image":[5],"aesthetics":[6,37,65,74],"perception":[7,38,177],"(IAP)":[8],"poses":[9],"a":[10,48,78],"significant":[11],"challenge":[12],"for":[13,62],"current":[14],"multimodal":[15],"large":[16],"language":[17,91],"models":[18,172],"(MLLMs).":[19],"lack":[21],"human-annotated":[23],"multi-modality":[24,64,159],"aesthetic":[25,80,102,106,123,129,144,176],"data":[26],"further":[27,117],"exacerbates":[28],"this":[29],"dilemma,":[30],"resulting":[31],"in":[32],"MLLMs":[33,71,111],"falling":[34],"short":[35],"capabilities.":[39,145],"To":[40,108],"address":[41],"the":[42,60,122,127,148,153,169,180,184],"above":[43],"challenge,":[44],"we":[45,76,116,151],"first":[46],"introduce":[47],"comprehensively":[49],"annotated":[50],"Aesthetic":[51,160],"Multi-Modality":[52],"Instruction":[53],"Tuning":[54],"(AesMMIT)":[55],"dataset,":[56,132],"which":[57,93,135],"serves":[58],"as":[59],"footstone":[61],"building":[63],"foundation":[66,156],"models.":[67],"Specifically,":[68],"to":[69,104,120,141],"align":[70],"with":[72,83],"human":[73,89],"perception,":[75],"construct":[77],"corpus-rich":[79],"critique":[81],"database":[82],"21,904":[84],"diverse-sourced":[85],"images":[86],"and":[87,125,188],"88K":[88],"natural":[90],"feedbacks,":[92],"are":[94],"collected":[95],"via":[96],"progressive":[97],"questions,":[98],"ranging":[99],"from":[100],"coarse-grained":[101],"grades":[103],"fine-grained":[105],"descriptions.":[107],"ensure":[109],"that":[110,168],"can":[112],"handle":[113],"diverse":[114],"queries,":[115],"prompt":[118],"GPT":[119],"refine":[121],"critiques":[124],"assemble":[126],"large-scale":[128],"instruction":[130],"tuning":[131],"i.e.":[133],"AesMMIT,":[134],"consists":[136],"409K":[138],"multi-typed":[139],"instructions":[140],"activate":[142],"stronger":[143],"Based":[146],"on":[147],"AesMMIT":[149],"database,":[150],"fine-tune":[152],"open-sourced":[154],"general":[155],"models,":[157,162],"achieving":[158],"Expert":[161],"dubbed":[163],"AesExpert.":[164],"Extensive":[165],"experiments":[166],"demonstrate":[167],"proposed":[170],"AesExpert":[171],"deliver":[173],"significantly":[174],"better":[175],"performances":[178],"than":[179],"state-of-the-art":[181],"MLLMs,":[182],"including":[183],"most":[185],"advanced":[186],"GPT-4V":[187],"Gemini-Pro-Vision.":[189],"Project":[190],"Page:":[191],"https://yipoh.github.io/aes-expert/.":[192]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
