{"id":"https://openalex.org/W7160302078","doi":"https://doi.org/10.1109/wacv61042.2026.00358","title":"Autoregressive Styled Text Image Generation, but Make it Reliable","display_name":"Autoregressive Styled Text Image Generation, but Make it Reliable","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7160302078","doi":"https://doi.org/10.1109/wacv61042.2026.00358"},"language":null,"primary_location":{"id":"doi:10.1109/wacv61042.2026.00358","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00358","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135387188","display_name":"Carmine Zaccagnino","orcid":null},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Carmine Zaccagnino","raw_affiliation_strings":["University of Modena and Reggio Emilia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Modena and Reggio Emilia","institution_ids":["https://openalex.org/I122346577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065897902","display_name":"Fabio Quattrini","orcid":"https://orcid.org/0009-0004-3244-6186"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Fabio Quattrini","raw_affiliation_strings":["University of Modena and Reggio Emilia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Modena and Reggio Emilia","institution_ids":["https://openalex.org/I122346577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040381049","display_name":"Vittorio Pippi","orcid":"https://orcid.org/0009-0001-7365-6348"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Vittorio Pippi","raw_affiliation_strings":["University of Modena and Reggio Emilia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Modena and Reggio Emilia","institution_ids":["https://openalex.org/I122346577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041010671","display_name":"Silvia Cascianelli","orcid":"https://orcid.org/0000-0001-7885-6050"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Silvia Cascianelli","raw_affiliation_strings":["University of Modena and Reggio Emilia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Modena and Reggio Emilia","institution_ids":["https://openalex.org/I122346577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017431855","display_name":"Alessio Tonioni","orcid":"https://orcid.org/0000-0003-3358-9686"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alessio Tonioni","raw_affiliation_strings":["Google"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5135357010","display_name":"Rita Cucchiara","orcid":null},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Rita Cucchiara","raw_affiliation_strings":["University of Modena and Reggio Emilia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Modena and Reggio Emilia","institution_ids":["https://openalex.org/I122346577"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.68963455,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3668","last_page":"3678"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4101000130176544,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4101000130176544,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.056699998676776886,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.053300000727176666,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.49709999561309814},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.48030000925064087},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.35580000281333923},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3474999964237213},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.3346000015735626}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6029999852180481},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5436999797821045},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.49709999561309814},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.48030000925064087},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43779999017715454},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.35580000281333923},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3474999964237213},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3346000015735626},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.304500013589859},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.2881999909877777},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.272599995136261},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.2535000145435333}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wacv61042.2026.00358","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00358","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309327","display_name":"Google","ror":"https://ror.org/00njsd438"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W2050995497","https://openalex.org/W2152928267","https://openalex.org/W2394605686","https://openalex.org/W2784512264","https://openalex.org/W2915977493","https://openalex.org/W3003967978","https://openalex.org/W3034726763","https://openalex.org/W3034904792","https://openalex.org/W3109645351","https://openalex.org/W3126649386","https://openalex.org/W3164045210","https://openalex.org/W3167911358","https://openalex.org/W3173630418","https://openalex.org/W3180355996","https://openalex.org/W3184543489","https://openalex.org/W3211060597","https://openalex.org/W4214732854","https://openalex.org/W4229453476","https://openalex.org/W4285999733","https://openalex.org/W4312933868","https://openalex.org/W4313021454","https://openalex.org/W4379740045","https://openalex.org/W4382202677","https://openalex.org/W4385990957","https://openalex.org/W4385991139","https://openalex.org/W4386065864","https://openalex.org/W4386076475","https://openalex.org/W4386083087","https://openalex.org/W4388999366","https://openalex.org/W4402343061","https://openalex.org/W4402959532","https://openalex.org/W4403422942","https://openalex.org/W4403942675","https://openalex.org/W4403990898","https://openalex.org/W4404024950","https://openalex.org/W4404725181","https://openalex.org/W4410773402","https://openalex.org/W4412357756","https://openalex.org/W4413145006","https://openalex.org/W4413145894","https://openalex.org/W4413147473","https://openalex.org/W7131061531","https://openalex.org/W7131104799"],"related_works":[],"abstract_inverted_index":{"Generating":[0],"faithful":[1],"and":[2,26,59,82,109,161],"readable":[3],"styled":[4],"text":[5],"images":[6],"(especially":[7],"for":[8,69,121,134],"Styled":[9],"Handwritten":[10],"Text":[11],"generation-HTG)":[12],"is":[13,37],"an":[14],"open":[15],"problem":[16],"with":[17,51,124],"several":[18],"possible":[19],"applications":[20],"across":[21],"graphic":[22],"design,":[23],"document":[24],"understanding,":[25],"image":[27],"editing.":[28],"A":[29],"lot":[30],"of":[31,47,56],"research":[32],"effort":[33],"in":[34,54,86],"this":[35,72,93],"task":[36],"dedicated":[38],"to":[39,150,158],"developing":[40],"strategies":[41],"that":[42,144],"reproduce":[43],"the":[44,63,97,111,125,165],"stylistic":[45],"characteristics":[46],"a":[48,78,104,131],"given":[49],"writer,":[50],"promising":[52],"results":[53],"terms":[55],"style":[57],"fidelity":[58],"generalization":[60],"achieved":[61],"by":[62,100,115],"recently":[64],"proposed":[65],"Autoregressive":[66],"Transformer":[67],"paradigm":[68],"HTG.":[70],"However,":[71],"method":[73],"requires":[74,153],"additional":[75],"inputs,":[76,155],"lacks":[77],"proper":[79],"stop":[80],"mechanism,":[81],"might":[83],"end":[84],"up":[85],"repetition":[87],"loops,":[88],"generating":[89],"visual":[90,126],"artifacts.":[91],"In":[92],"work,":[94],"we":[95,129,142],"rethink":[96],"autoregressive":[98,136],"formulation":[99],"framing":[101],"HTG":[102],"as":[103],"multimodal":[105],"prompt-conditioned":[106],"generation":[107],"task,":[108],"tackle":[110],"content":[112,169],"controllability":[113],"issues":[114],"introducing":[116],"special":[117],"textual":[118,166],"input":[119],"tokens":[120],"better":[122,157],"alignment":[123],"ones.":[127],"Moreover,":[128],"devise":[130],"Classifier-Free-Guidance-based":[132],"strategy":[133],"our":[135,145],"model.":[137],"Through":[138],"extensive":[139],"experimental":[140],"validation,":[141],"demonstrate":[143],"approach,":[146],"dubbed":[147],"Eruku,":[148],"compared":[149],"previous":[151],"solutions":[152],"fewer":[154],"generalizes":[156],"unseen":[159],"styles,":[160],"follows":[162],"more":[163],"faithfully":[164],"prompt,":[167],"improving":[168],"adherence.":[170]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
