{"id":"https://openalex.org/W7134850335","doi":"https://doi.org/10.48550/arxiv.2603.07119","title":"TIQA: Human-Aligned Perceptual Text Quality Assessment in Generated Images","display_name":"TIQA: Human-Aligned Perceptual Text Quality Assessment in Generated Images","publication_year":2026,"publication_date":"2026-03-07","ids":{"openalex":"https://openalex.org/W7134850335","doi":"https://doi.org/10.48550/arxiv.2603.07119"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.07119","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128669251","display_name":"Kirill Koltsov","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Koltsov, Kirill","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128673021","display_name":"Aleksandr Gushchin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gushchin, Aleksandr","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128630655","display_name":"Dmitriy Vatolin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Antsiferova, Anastasia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5086393377","display_name":"Anastasia Antsiferova","orcid":"https://orcid.org/0000-0002-1272-5135"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vatolin, Dmitriy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5128669251"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.23579999804496765,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.23579999804496765,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.20900000631809235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.16459999978542328,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6674000024795532},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.5275999903678894},{"id":"https://openalex.org/keywords/quality-assessment","display_name":"Quality assessment","score":0.5246999859809875},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.4814999997615814},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4530999958515167},{"id":"https://openalex.org/keywords/text-generation","display_name":"Text generation","score":0.4478999972343445},{"id":"https://openalex.org/keywords/quality-score","display_name":"Quality Score","score":0.43050000071525574}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7577000260353088},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6674000024795532},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.5275999903678894},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.5246999859809875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5169000029563904},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.4814999997615814},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4530999958515167},{"id":"https://openalex.org/C2985684807","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Text generation","level":2,"score":0.4478999972343445},{"id":"https://openalex.org/C2779346075","wikidata":"https://www.wikidata.org/wiki/Q7268763","display_name":"Quality Score","level":3,"score":0.43050000071525574},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4302999973297119},{"id":"https://openalex.org/C2983812711","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text recognition","level":3,"score":0.42320001125335693},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.39430001378059387},{"id":"https://openalex.org/C2983589003","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text detection","level":3,"score":0.3862999975681305},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.37599998712539673},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.35589998960494995},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3310999870300293},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.287200003862381},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26440000534057617},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2619999945163727},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.25220000743865967}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.07119","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.07119","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.07119","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.07119","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"text-to-image":[1,197],"models":[2],"have":[3],"improved":[4],"global":[5],"realism,":[6],"but":[7],"text":[8,56,61,77,165,188],"rendering":[9],"remains":[10],"a":[11,44,49,121,191],"persistent":[12],"failure":[13],"mode:":[14],"images":[15,82,102],"may":[16],"look":[17],"convincing":[18],"overall,":[19],"yet":[20],"local":[21],"typography":[22],"often":[23],"contains":[24,75,99],"malformed":[25],"glyphs,":[26],"broken":[27],"strokes,":[28],"irregular":[29],"spacing,":[30],"and":[31,92,113,130,147,180],"other":[32],"artifacts":[33],"that":[34,47],"humans":[35],"heavily":[36],"penalize.":[37],"We":[38,117],"formulate":[39],"Text-in-Image":[40],"Quality":[41],"Assessment":[42],"(TIQA),":[43],"no-reference":[45],"task":[46],"estimates":[48],"human-aligned":[50],"perceptual":[51,187],"quality":[52,62,166,189],"score":[53],"for":[54,96,149,177,195],"detected":[55],"regions":[57],"while":[58],"disentangling":[59],"visual":[60],"from":[63,79,103],"semantic":[64],"correctness.":[65],"To":[66],"support":[67],"this":[68],"setting,":[69],"we":[70],"introduce":[71],"two":[72],"datasets.":[73],"TIQA-Crops":[74,146],"120k":[76],"crops":[78],"36k":[80],"AI-generated":[81,159],"produced":[83],"by":[84,171],"12":[85],"generators,":[86,106],"with":[87,110,124,138],"10k":[88],"mean-opinion-score":[89],"(MOS)":[90],"labels":[91,95],"110k":[93],"proxy":[94],"pretraining.":[97],"TIQA-Images":[98],"1,500":[100],"text-heavy":[101],"10":[104],"recent":[105],"including":[107],"proprietary":[108],"systems,":[109],"paired":[111],"overall-quality":[112],"text-quality":[114,150],"subjective":[115],"scores.":[116],"also":[118],"propose":[119],"ANTIQA,":[120],"lightweight":[122],"predictor":[123],"text-specific":[125],"inductive":[126],"biases.":[127],"Across":[128],"crop-level":[129],"image-level":[131],"evaluations,":[132],"ANTIQA":[133,162],"achieves":[134],"the":[135,164,168],"best":[136],"alignment":[137],"human":[139],"judgments,":[140],"reaching":[141],"PLCC/SROCC":[142],"of":[143,167],"0.942/0.935":[144],"on":[145,152],"0.842/0.837":[148],"MOS":[151,173],"unseen":[153],"generators":[154],"in":[155],"TIQA-Images.":[156],"In":[157],"best-of-5":[158],"image":[160,170],"ranking,":[161],"improves":[163],"selected":[169],"0.36":[172],"(14%),":[174],"demonstrating":[175],"utility":[176],"benchmarking,":[178],"filtering,":[179],"generation-time":[181],"selection.":[182],"Together,":[183],"these":[184],"findings":[185],"establish":[186],"as":[190],"distinct":[192],"evaluation":[193],"target":[194],"modern":[196],"generation.":[198]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2026-03-11T00:00:00"}
