{"id":"https://openalex.org/W7138290439","doi":"https://doi.org/10.1609/aaai.v40i6.42458","title":"EvalMuse-40K: A Fine-Grained Benchmark with Comprehensive Human Annotations for Text-to-Image Generation Model Alignment Evaluation","display_name":"EvalMuse-40K: A Fine-Grained Benchmark with Comprehensive Human Annotations for Text-to-Image Generation Model Alignment Evaluation","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138290439","doi":"https://doi.org/10.1609/aaai.v40i6.42458"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i6.42458","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i6.42458","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/42458/46419","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/42458/46419","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129726381","display_name":"Shuhao Han","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shuhao Han","raw_affiliation_strings":["Nankai University\nByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nankai University\nByteDance Inc","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129737337","display_name":"Haotian Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haotian Fan","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129742742","display_name":"Jiachen Fu","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiachen Fu","raw_affiliation_strings":["Nankai University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nankai University","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129671399","display_name":"Liang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang Li","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129715777","display_name":"Tao E. Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao Li","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129680257","display_name":"Junhui Cui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junhui Cui","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129698447","display_name":"Yunqiu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunqiu Wang","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129724529","display_name":"Yang Tai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang Tai","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129647879","display_name":"Jingwei Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jingwei Sun","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129741939","display_name":"Chun-Le Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chun-Le Guo","raw_affiliation_strings":["Nankai University\nNKIARI, Shenzhen Futian"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nankai University\nNKIARI, Shenzhen Futian","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129708426","display_name":"Chongyi Li","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chongyi Li","raw_affiliation_strings":["Nankai University\nNKIARI, Shenzhen Futian"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nankai University\nNKIARI, Shenzhen Futian","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5129726381"],"corresponding_institution_ids":["https://openalex.org/I205237279"],"apc_list":null,"apc_paid":null,"fwci":12.8349,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.96924177,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"40","issue":"6","first_page":"4583","last_page":"4591"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.41929998993873596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.41929998993873596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.26570001244544983,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.04729999974370003,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5855000019073486},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5482000112533569},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.39800000190734863},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.3634999990463257},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.34049999713897705},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.3384999930858612},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.3253999948501587}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8327000141143799},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5855000019073486},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5482000112533569},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4878000020980835},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48500001430511475},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.450300008058548},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.39800000190734863},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.3634999990463257},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.34049999713897705},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3384999930858612},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3253999948501587},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.32190001010894775},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C3018395757","wikidata":"https://www.wikidata.org/wiki/Q1379672","display_name":"Evaluation methods","level":2,"score":0.2992999851703644},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C3019060180","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automated method","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.27469998598098755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i6.42458","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i6.42458","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/42458/46419","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i6.42458","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i6.42458","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/42458/46419","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6785956370","display_name":null,"funder_award_id":"62225604","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323993","display_name":"Natural Science Foundation of Tianjin City","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138290439.pdf","grobid_xml":"https://content.openalex.org/works/W7138290439.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text-to-Image":[0],"(T2I)":[1],"generation":[2],"models":[3],"have":[4],"achieved":[5],"significant":[6],"advancements.":[7],"Correspondingly,":[8],"many":[9],"automated":[10,28,50,124],"methods":[11,29,51,112],"emerge":[12],"to":[13,45,92,104,184],"evaluate":[14,106],"the":[15,23,33,43,47,77,94,107,153,168,191],"image-text":[16,66,73,110,162,192],"alignment":[17,111,163,193],"capabilities":[18],"of":[19,36,49,98,109,175,195],"generative":[20],"models.":[21,115,197],"However,":[22],"performance":[24,48,108,159],"comparison":[25],"among":[26],"these":[27],"is":[30],"constrained":[31],"by":[32,135],"limited":[34],"scale":[35],"existing":[37,40],"datasets.":[38,164],"Additionally,":[39],"datasets":[41],"lack":[42],"capacity":[44],"assess":[46],"at":[52],"a":[53,181],"fine-grained":[54,69,148,172],"level.":[55],"In":[56,76],"this":[57,118],"study,":[58],"we":[59,80,120,177],"contribute":[60],"an":[61,122],"EvalMuse-40K":[62],"dataset,":[63,119],"gathering":[64],"40K":[65],"pairs":[67],"with":[68],"human":[70],"annotations":[71],"for":[72,113,146],"alignment-related":[74],"tasks.":[75],"construction":[78],"process,":[79],"employ":[81],"various":[82],"strategies":[83],"such":[84],"as":[85,180],"balanced":[86],"prompt":[87],"sampling":[88],"and":[89,96,138,171],"data":[90],"re-annotation":[91],"ensure":[93],"diversity":[95],"reliability":[97],"our":[99],"dataset.":[100],"This":[101],"allows":[102],"us":[103],"comprehensively":[105],"T2I":[114],"Based":[116],"on":[117,160],"introduce":[121],"efficient":[123],"evaluation":[125,133,173],"method":[126],"termed":[127],"FGA-BLIP2,":[128,176],"which":[129,188],"enables":[130],"Fine-Grained":[131],"Alignment":[132],"solely":[134],"inputting":[136],"images":[137],"text":[139],"leveraging":[140],"BLIP2,":[141],"without":[142],"visual":[143],"question":[144],"answering":[145],"each":[147],"element.":[149],"Experimental":[150],"results":[151],"show":[152],"proposed":[154],"FGA-BLIP2":[155],"efficiently":[156],"achieves":[157],"good":[158],"multiple":[161],"Meanwhile,":[165],"benefiting":[166],"from":[167],"high":[169],"efficiency":[170],"capability":[174],"apply":[178],"it":[179],"reward":[182],"model":[183],"improve":[185],"text-to-image":[186,196],"models,":[187],"effectively":[189],"enhances":[190],"ability":[194]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
