{"id":"https://openalex.org/W7133358306","doi":"https://doi.org/10.48550/arxiv.2603.01083","title":"Can Vision Language Models Assess Graphic Design Aesthetics? A Benchmark, Evaluation, and Dataset Perspective","display_name":"Can Vision Language Models Assess Graphic Design Aesthetics? A Benchmark, Evaluation, and Dataset Perspective","publication_year":2026,"publication_date":"2026-03-01","ids":{"openalex":"https://openalex.org/W7133358306","doi":"https://doi.org/10.48550/arxiv.2603.01083"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01083","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01083","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01083","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088308455","display_name":"A.X. An","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Arctanx","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124936614","display_name":"Shizhao Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Shizhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061321029","display_name":"Danqing Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Danqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057367919","display_name":"Mingxi Cheng","orcid":"https://orcid.org/0000-0002-8070-6665"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Mingxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127940954","display_name":"Yan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Yan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128003288","display_name":"Ji Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127892833","display_name":"Yu Qiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127948903","display_name":"Jiang Bian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bian, Jiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.3287000060081482,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.3287000060081482,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.21619999408721924,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12650","display_name":"Aesthetic Perception and Analysis","score":0.18080000579357147,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.7222999930381775},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5990999937057495},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5546000003814697},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5343999862670898},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5013999938964844},{"id":"https://openalex.org/keywords/visual-language","display_name":"Visual language","score":0.45730000734329224},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.44369998574256897},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4284000098705292}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7235999703407288},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.7222999930381775},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5990999937057495},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5546000003814697},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5343999862670898},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5013999938964844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4578999876976013},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.45730000734329224},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.44369998574256897},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4284000098705292},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.42399999499320984},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4187000095844269},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.39010000228881836},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.37779998779296875},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37459999322891235},{"id":"https://openalex.org/C15724806","wikidata":"https://www.wikidata.org/wiki/Q185925","display_name":"Graphic design","level":2,"score":0.3709999918937683},{"id":"https://openalex.org/C41022531","wikidata":"https://www.wikidata.org/wiki/Q333657","display_name":"Communication design","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3601999878883362},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3497999906539917},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33230000734329224},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.27549999952316284},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.262800008058548},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01083","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01083","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01083","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01083","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4975726902484894,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Assessing":[0],"the":[1,101,143],"aesthetic":[2,80,105,148],"quality":[3,149],"of":[4,50,104],"graphic":[5,152],"design":[6,26,138],"is":[7],"central":[8],"to":[9,31,41,113,123,132,136],"visual":[10],"communication,":[11],"yet":[12],"remains":[13],"underexplored":[14],"in":[15,28,151],"vision":[16],"language":[17],"models":[18],"(VLMs).":[19],"We":[20],"investigate":[21],"whether":[22],"VLMs":[23,115],"can":[24],"evaluate":[25,90],"aesthetics":[27],"ways":[29],"comparable":[30],"humans.":[32],"Prior":[33],"work":[34,141],"faces":[35],"three":[36,76],"key":[37],"limitations:":[38],"benchmarks":[39],"restricted":[40],"narrow":[42],"principles":[43],"and":[44,54,75,84,93,129,156],"coarse":[45],"evaluation":[46],"protocols,":[47],"a":[48,67,110],"lack":[49],"systematic":[51,145],"VLM":[52,121],"comparisons,":[53],"limited":[55],"training":[56,111],"data":[57],"for":[58,116,147],"model":[59],"improvement.":[60],"In":[61],"this":[62,117],"work,":[63],"we":[64,88,108],"introduce":[65],"AesEval-Bench,":[66],"comprehensive":[68],"benchmark":[69],"spanning":[70],"four":[71],"dimensions,":[72],"twelve":[73],"indicators,":[74],"fully":[77],"quantifiable":[78],"tasks:":[79],"judgment,":[81],"region":[82],"selection,":[83],"precise":[85],"localization.":[86],"Then,":[87],"systematically":[89],"proprietary,":[91],"open-source,":[92],"reasoning-augmented":[94],"VLMs,":[95],"revealing":[96],"clear":[97],"performance":[98],"gaps":[99],"against":[100],"nuanced":[102],"demands":[103],"assessment.":[106],"Moreover,":[107],"construct":[109],"dataset":[112,157],"fine-tune":[114],"domain,":[118],"leveraging":[119],"human-guided":[120],"labeling":[122],"produce":[124],"task":[125],"labels":[126],"at":[127],"scale":[128],"indicator-grounded":[130],"reasoning":[131],"tie":[133],"abstract":[134],"indicators":[135],"concrete":[137],"regions.Together,":[139],"our":[140],"establishes":[142],"first":[144],"framework":[146],"assessment":[150],"design.":[153],"Our":[154],"code":[155],"will":[158],"be":[159],"released":[160],"at:":[161],"\\href{https://github.com/arctanxarc/AesEval-Bench}{https://github.com/arctanxarc/AesEval-Bench}":[162]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-04T00:00:00"}
