{"id":"https://openalex.org/W7154360707","doi":"https://doi.org/10.48550/arxiv.2604.10127","title":"VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation","display_name":"VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation","publication_year":2026,"publication_date":"2026-04-11","ids":{"openalex":"https://openalex.org/W7154360707","doi":"https://doi.org/10.48550/arxiv.2604.10127"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10127","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10127","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10127","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114094160","display_name":"Longteng Jiang","orcid":"https://orcid.org/0009-0007-5434-9573"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jiang, Longteng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133579800","display_name":"DanDan Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, DanDan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067179732","display_name":"Qianqian Qiao","orcid":"https://orcid.org/0009-0009-7298-4137"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao, Qianqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133605699","display_name":"Heng Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Heng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133569070","display_name":"Huaye Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Huaye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059812401","display_name":"Yihang Bo","orcid":"https://orcid.org/0009-0005-0060-6621"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bo, Yihang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133607382","display_name":"Bao Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Bao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133616008","display_name":"Jingdong Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jingdong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133625285","display_name":"Jun Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133619306","display_name":"Xin Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Xin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5114094160"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.8399999737739563,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.8399999737739563,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.048900000751018524,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.029500000178813934,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7994999885559082},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.652400016784668},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6197999715805054},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5641000270843506},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.45829999446868896},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.37709999084472656}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7994999885559082},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.736299991607666},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.652400016784668},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6197999715805054},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5641000270843506},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46369999647140503},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.45829999446868896},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3993000090122223},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.37709999084472656},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3499000072479248},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3158000111579895},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3066999912261963},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.30239999294281006},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C54525549","wikidata":"https://www.wikidata.org/wiki/Q2553445","display_name":"Weaving","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C2992829110","wikidata":"https://www.wikidata.org/wiki/Q3921615","display_name":"First generation","level":3,"score":0.2854999899864197},{"id":"https://openalex.org/C184408114","wikidata":"https://www.wikidata.org/wiki/Q1502022","display_name":"Generative Design","level":3,"score":0.25220000743865967},{"id":"https://openalex.org/C103910844","wikidata":"https://www.wikidata.org/wiki/Q2631256","display_name":"Video quality","level":3,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10127","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10127","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10127","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10127","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"rapid":[1],"advancement":[2],"of":[3,63,109,136],"AIGC-based":[4],"video":[5,64,115],"generation":[6,20,65,116,162],"has":[7],"underscored":[8],"the":[9,137],"critical":[10],"need":[11],"for":[12,60,150,155,161],"comprehensive":[13],"evaluation":[14,62],"frameworks":[15],"that":[16,170],"go":[17],"beyond":[18],"traditional":[19],"quality":[21,66,152,165],"metrics":[22],"to":[23,45,91,191],"encompass":[24],"aesthetic":[25,68,151,157],"appeal.":[26],"However,":[27],"existing":[28],"benchmarks":[29],"remain":[30],"largely":[31],"focused":[32],"on":[33],"technical":[34],"fidelity,":[35],"leaving":[36],"a":[37,57,74,106,134,188],"significant":[38],"gap":[39],"in":[40,194,199],"holistic":[41],"assessment-particularly":[42],"with":[43,176,197],"respect":[44],"perceptual":[46],"and":[47,67,82,104,124,129,142,159,163,182,204],"artistic":[48],"qualities.":[49],"To":[50,126],"address":[51],"this":[52,97],"limitation,":[53],"we":[54,99,132],"introduce":[55],"VGA-Bench,":[56],"unified":[58],"benchmark":[59,190],"joint":[61],"quality.":[69],"VGA-Bench":[70,186],"is":[71],"built":[72],"upon":[73],"principled":[75],"three-tier":[76],"taxonomy:":[77],"Aesthetic":[78,80],"Quality,":[79,84],"Tagging,":[81],"Generation":[83],"each":[85],"decomposed":[86],"into":[87],"multiple":[88],"fine-grained":[89],"sub-dimensions":[90],"enable":[92,127],"systematic":[93],"assessment.":[94],"Guided":[95],"by":[96],"taxonomy,":[98],"design":[100],"1,016":[101],"diverse":[102],"prompts":[103],"generate":[105],"large-scale":[107],"dataset":[108,138],"over":[110],"60,000":[111],"videos":[112],"using":[113],"12":[114],"models,":[117],"ensuring":[118],"broad":[119],"coverage":[120],"across":[121],"content,":[122],"style,":[123],"artifacts.":[125],"scalable":[128],"automated":[130],"evaluation,":[131,196],"annotate":[133],"subset":[135],"via":[139],"human":[140,177],"labeling":[141],"develop":[143],"three":[144],"dedicated":[145],"multi-task":[146],"neural":[147],"assessors:":[148],"VAQA-Net":[149],"prediction,":[153],"VTag-Net":[154],"automatic":[156],"tagging,":[158],"VGQA-Net":[160],"basic":[164],"attributes.":[166],"Extensive":[167],"experiments":[168],"demonstrate":[169],"our":[171],"models":[172],"achieve":[173],"reliable":[174],"alignment":[175],"judgments,":[178],"offering":[179],"both":[180],"accuracy":[181],"efficiency.":[183],"We":[184],"release":[185],"as":[187],"public":[189],"foster":[192],"research":[193],"AIGC":[195],"applications":[198],"content":[200],"moderation,":[201],"model":[202,206],"debugging,":[203],"generative":[205],"optimization.":[207]},"counts_by_year":[],"updated_date":"2026-04-15T06:04:33.058270","created_date":"2026-04-15T00:00:00"}
