{"id":"https://openalex.org/W7161166458","doi":"https://doi.org/10.48550/arxiv.2605.12684","title":"Visual Aesthetic Benchmark: Can Frontier Models Judge Beauty?","display_name":"Visual Aesthetic Benchmark: Can Frontier Models Judge Beauty?","publication_year":2026,"publication_date":"2026-05-12","ids":{"openalex":"https://openalex.org/W7161166458","doi":"https://doi.org/10.48550/arxiv.2605.12684"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.12684","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12684","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.12684","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124891379","display_name":"Yichen Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Yichen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136177247","display_name":"Yuetai Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yuetai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136149202","display_name":"Chunjiang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Chunjiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136176839","display_name":"Yuanyuan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yuanyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007846132","display_name":"Fengqing Jiang","orcid":"https://orcid.org/0009-0002-9077-2399"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Fengqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136133785","display_name":"Yue Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Yue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136132553","display_name":"Hang Hua","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hua, Hang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136160412","display_name":"Zhengqing Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Zhengqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136124914","display_name":"Kaiyuan Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Kaiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018806127","display_name":"Luyao Niu","orcid":"https://orcid.org/0000-0001-8591-5522"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Niu, Luyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052064870","display_name":"Bhaskar Ramasubramanian","orcid":"https://orcid.org/0000-0002-2166-7838"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramasubramanian, Bhaskar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136140104","display_name":"Basel Alomair","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alomair, Basel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136185209","display_name":"Xiangliang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiangliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029380651","display_name":"Misha Sra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sra, Misha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104078124","display_name":"Zichen Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zichen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079723268","display_name":"Radha Poovendran","orcid":"https://orcid.org/0000-0003-0269-8097"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Poovendran, Radha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5028990387","display_name":"Zhangchen Xu","orcid":"https://orcid.org/0000-0002-6971-412X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zhangchen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":17,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12650","display_name":"Aesthetic Perception and Analysis","score":0.44040000438690186,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12650","display_name":"Aesthetic Perception and Analysis","score":0.44040000438690186,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.21709999442100525,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1655000001192093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.5479000210762024},{"id":"https://openalex.org/keywords/ask-price","display_name":"Ask price","score":0.503600001335144},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.49950000643730164},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.49000000953674316},{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.3903999924659729},{"id":"https://openalex.org/keywords/black-box","display_name":"Black box","score":0.33489999175071716},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.32339999079704285},{"id":"https://openalex.org/keywords/human-visual-system-model","display_name":"Human visual system model","score":0.3163999915122986}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6003999710083008},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5856000185012817},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.5479000210762024},{"id":"https://openalex.org/C90329073","wikidata":"https://www.wikidata.org/wiki/Q914232","display_name":"Ask price","level":2,"score":0.503600001335144},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.49950000643730164},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.49000000953674316},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43849998712539673},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.3903999924659729},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.32339999079704285},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.3163999915122986},{"id":"https://openalex.org/C7220189","wikidata":"https://www.wikidata.org/wiki/Q52827","display_name":"Pictogram","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.31520000100135803},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.29330000281333923},{"id":"https://openalex.org/C2779525943","wikidata":"https://www.wikidata.org/wiki/Q1187300","display_name":"Grammaticality","level":3,"score":0.2874000072479248},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.25699999928474426},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.12684","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12684","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.12684","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12684","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"large":[1],"language":[2],"models":[3,219],"(MLLMs)":[4],"are":[5],"now":[6],"routinely":[7],"deployed":[8],"for":[9,37],"visual":[10],"understanding,":[11],"generation,":[12],"and":[13,79,111,118,137,153,213,220,224,239],"curation.":[14],"A":[15],"substantial":[16],"fraction":[17],"of":[18,126,162,169,193],"these":[19,208],"applications":[20],"require":[21],"an":[22],"explicit":[23],"aesthetic":[24,95,222],"judgment.":[25],"Most":[26],"existing":[27],"solutions":[28],"reduce":[29],"this":[30,84],"judgment":[31],"to":[32,191],"predicting":[33],"a":[34,38,52,180,194,211],"scalar":[35],"score":[36],"single":[39],"image.":[40],"We":[41],"first":[42,228],"ask":[43],"whether":[44],"such":[45],"scores":[46],"faithfully":[47],"capture":[48],"comparative":[49,98,201],"preference:":[50],"in":[51,166,203],"controlled":[53],"study":[54],"with":[55,63,103,120],"eight":[56],"expert":[57,129,185,221],"annotators,":[58],"score-derived":[59],"rankings":[60],"align":[61],"poorly":[62],"the":[64,88,124,146,151,154,163,173,200,227],"same":[65],"annotators'":[66],"direct":[67,70],"comparisons,":[68],"while":[69],"ranking":[71],"yields":[72],"substantially":[73],"higher":[74],"inter-annotator":[75],"agreement":[76],"on":[77,183,232],"best-":[78],"worst-image":[80],"labels.":[81],"Motivated":[82],"by":[83,176],"finding,":[85],"we":[86,143],"introduce":[87],"Visual":[89],"Aesthetic":[90],"Benchmark":[91],"(VAB),":[92],"which":[93,233],"casts":[94],"evaluation":[96],"as":[97],"selection":[99],"over":[100],"candidate":[101,164],"sets":[102],"matched":[104],"subject":[105],"matter.":[106],"VAB":[107,204,225],"contains":[108],"400":[109],"tasks":[110],"1,195":[112],"images":[113],"across":[114,158],"fine":[115],"art,":[116],"photography,":[117],"illustration,":[119],"labels":[121],"derived":[122],"from":[123],"consensus":[125],"10":[127],"independent":[128],"judges":[130],"per":[131],"task.":[132],"Evaluating":[133],"20":[134],"frontier":[135],"MLLMs":[136],"six":[138],"dedicated":[139],"visual-quality":[140],"reward":[141],"models,":[142],"find":[144],"that":[145,192,199,234],"strongest":[147],"system":[148],"identifies":[149],"both":[150],"best":[152],"worst":[155],"image":[156],"correctly":[157],"three":[159],"random":[160],"permutations":[161],"order":[165],"only":[167],"26.5%":[168],"tasks,":[170],"far":[171],"below":[172],"68.9%":[174],"achieved":[175],"human":[177],"experts.":[178],"Fine-tuning":[179],"35B-parameter":[181],"model":[182],"2,000":[184],"examples":[186],"brings":[187],"its":[188],"accuracy":[189],"close":[190],"397B-parameter":[195],"open-weight":[196],"model,":[197],"suggesting":[198],"signal":[202],"is":[205],"transferable.":[206],"Together,":[207],"results":[209],"expose":[210],"clear":[212],"measurable":[214],"gap":[215,235],"between":[216],"current":[217],"multimodal":[218],"judgment,":[223],"provides":[226],"set-based,":[229],"expert-grounded":[230],"testbed":[231],"can":[236],"be":[237],"tracked":[238],"closed.":[240]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-15T00:00:00"}
