{"id":"https://openalex.org/W7138278618","doi":"https://doi.org/10.1609/aaai.v40i6.42408","title":"ViType: High-Fidelity Visual Text Rendering via Glyph-Aware Multimodal Diffusion","display_name":"ViType: High-Fidelity Visual Text Rendering via Glyph-Aware Multimodal Diffusion","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138278618","doi":"https://doi.org/10.1609/aaai.v40i6.42408"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i6.42408","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i6.42408","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i6.42408","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129659377","display_name":"Lishuai Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lishuai Gao","raw_affiliation_strings":["Tianjin University of Technology\nMeituan"],"affiliations":[{"raw_affiliation_string":"Tianjin University of Technology\nMeituan","institution_ids":["https://openalex.org/I136765683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129687960","display_name":"Jun-Yan He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jun-Yan He","raw_affiliation_strings":["Meituan"],"affiliations":[{"raw_affiliation_string":"Meituan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072520602","display_name":"Yingsen Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yingsen Zeng","raw_affiliation_strings":["Meituan"],"affiliations":[{"raw_affiliation_string":"Meituan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129656865","display_name":"Yujie Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yujie Zhong","raw_affiliation_strings":["Meituan"],"affiliations":[{"raw_affiliation_string":"Meituan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129648986","display_name":"Xiaopeng Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaopeng Sun","raw_affiliation_strings":["Meituan"],"affiliations":[{"raw_affiliation_string":"Meituan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129662942","display_name":"Jie Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jie Hu","raw_affiliation_strings":["Meituan"],"affiliations":[{"raw_affiliation_string":"Meituan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048947531","display_name":"Zan Gao","orcid":"https://orcid.org/0000-0003-2182-5741"},"institutions":[{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zan Gao","raw_affiliation_strings":["Tianjin University of Technology"],"affiliations":[{"raw_affiliation_string":"Tianjin University of Technology","institution_ids":["https://openalex.org/I136765683"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129720343","display_name":"Xiaoming Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoming Wei","raw_affiliation_strings":["Meituan"],"affiliations":[{"raw_affiliation_string":"Meituan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5129659377"],"corresponding_institution_ids":["https://openalex.org/I136765683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.63059701,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"6","first_page":"4131","last_page":"4139"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7059999704360962,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7059999704360962,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07530000060796738,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.04149999842047691,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/glyph","display_name":"Glyph (data visualization)","score":0.5246000289916992},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.51910001039505},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.46790000796318054},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4586000144481659},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4007999897003174},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.3725999891757965},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.3337000012397766},{"id":"https://openalex.org/keywords/semantic-feature","display_name":"Semantic feature","score":0.32519999146461487}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8202999830245972},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5929999947547913},{"id":"https://openalex.org/C142816647","wikidata":"https://www.wikidata.org/wiki/Q5573018","display_name":"Glyph (data visualization)","level":3,"score":0.5246000289916992},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.51910001039505},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5085999965667725},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.46790000796318054},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4586000144481659},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4007999897003174},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.3725999891757965},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3337000012397766},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.3156999945640564},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.302700012922287},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.29510000348091125},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2833999991416931},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.275299996137619},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.25699999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i6.42408","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i6.42408","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i6.42408","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i6.42408","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5773506760597229,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"text-to-image":[1,61],"models":[2],"face":[3],"challenges":[4],"in":[5],"visual":[6,110,119,148,177],"text":[7,9,83,120,128,149],"rendering:":[8,121],"encoders":[10],"like":[11],"CLIP":[12],"and":[13,18,30,44,97,117,140,145,176],"T5":[14],"lack":[15],"glyph-level":[16],"understanding":[17,80],"often":[19],"struggle":[20],"to":[21,27,77,107,173],"distinguish":[22],"between":[23,40,91],"the":[24,41,49,59,65,82,92,95,104,113,131,143,171,190,195],"specific":[25],"words":[26],"be":[28],"rendered":[29],"their":[31,98],"intended":[32],"semantic":[33,127],"meaning":[34],"within":[35],"prompts.":[36],"In":[37,54],"addition,":[38],"inconsistencies":[39],"base":[42],"model":[43,172],"its":[45],"plugins":[46],"further":[47],"compromise":[48],"quality":[50,183],"of":[51,94,112,147],"synthesized":[52],"images.":[53],"this":[55],"paper,":[56],"we":[57],"enhance":[58],"existing":[60,196],"method":[62],"by":[63],"addressing":[64],"following":[66],"aspects:":[67],"(1)":[68],"Text-Glyph":[69],"Alignmentin":[70],"a":[71,156],"Visual":[72],"Question":[73],"Answering":[74],"(VQA)":[75],"manner":[76],"enable":[78],"glyph":[79],"for":[81],"encoder.":[84],"This":[85],"involves":[86],"establishing":[87],"an":[88],"explicit":[89],"alignment":[90,139],"representations":[93],"glyphs":[96],"detailed":[99],"attribute":[100],"descriptions,":[101],"which":[102],"boosts":[103],"model's":[105],"ability":[106],"capture":[108],"fine-grained":[109],"features":[111],"text.":[114],"(2)":[115],"Accurate":[116],"harmony":[118],"integrating":[122],"pre-aligned":[123],"glyph-visual":[124],"embeddings":[125],"with":[126],"tokens":[129],"through":[130],"Multimodal":[132],"Diffusion":[133],"Transformer(MMDiT)":[134],"synchronously,":[135],"ensuring":[136],"coherent":[137],"feature":[138],"enhancing":[141],"both":[142],"robustness":[144],"fidelity":[146],"rendering.":[150],"(3)":[151],"Image":[152],"Aesthetic":[153],"Refinement:":[154],"leveraging":[155],"multisource":[157],"data":[158],"training":[159],"strategy":[160],"that":[161,189],"incorporates":[162],"diverse,":[163],"high-quality":[164],"image-text":[165],"pairs":[166],"from":[167],"various":[168],"domains,":[169],"exposing":[170],"extensive":[174],"linguistic":[175],"diversity":[178],"while":[179],"maintaining":[180],"superior":[181],"aesthetic":[182],"throughout":[184],"training.":[185],"Our":[186],"experiments":[187],"demonstrate":[188],"proposed":[191],"approach":[192],"significantly":[193],"outperforms":[194],"state-of-the-art":[197],"method.":[198]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
