{"id":"https://openalex.org/W4415332610","doi":"https://doi.org/10.48550/arxiv.2509.23624","title":"DiffInk: Glyph- and Style-Aware Latent Diffusion Transformer for Text to Online Handwriting Generation","display_name":"DiffInk: Glyph- and Style-Aware Latent Diffusion Transformer for Text to Online Handwriting Generation","publication_year":2025,"publication_date":"2025-09-28","ids":{"openalex":"https://openalex.org/W4415332610","doi":"https://doi.org/10.48550/arxiv.2509.23624"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2509.23624","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.23624","pdf_url":"https://arxiv.org/pdf/2509.23624","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2509.23624","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083084806","display_name":"Wei Pan","orcid":"https://orcid.org/0000-0002-2720-3073"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Pan, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045716717","display_name":"Huiguo He","orcid":"https://orcid.org/0000-0003-1419-059X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Huiguo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102547914","display_name":"Hiuyi Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Hiuyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033120372","display_name":"Yao Shi","orcid":"https://orcid.org/0000-0003-0068-4804"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Yilin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5080674767","display_name":"Lianwen Jin","orcid":"https://orcid.org/0000-0002-5456-0957"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Lianwen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5083084806"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7245000004768372},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7092000246047974},{"id":"https://openalex.org/keywords/handwriting","display_name":"Handwriting","score":0.5730000138282776},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5278000235557556},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5273000001907349},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4392000138759613},{"id":"https://openalex.org/keywords/glyph","display_name":"Glyph (data visualization)","score":0.42750000953674316},{"id":"https://openalex.org/keywords/handwriting-recognition","display_name":"Handwriting recognition","score":0.4140999913215637},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.37540000677108765}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7275000214576721},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7245000004768372},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7092000246047974},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.574999988079071},{"id":"https://openalex.org/C2779386606","wikidata":"https://www.wikidata.org/wiki/Q2393642","display_name":"Handwriting","level":2,"score":0.5730000138282776},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5278000235557556},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5273000001907349},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4392000138759613},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43549999594688416},{"id":"https://openalex.org/C142816647","wikidata":"https://www.wikidata.org/wiki/Q5573018","display_name":"Glyph (data visualization)","level":3,"score":0.42750000953674316},{"id":"https://openalex.org/C112640561","wikidata":"https://www.wikidata.org/wiki/Q2440634","display_name":"Handwriting recognition","level":3,"score":0.4140999913215637},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.37540000677108765},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36640000343322754},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.36169999837875366},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.361299991607666},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3587000072002411},{"id":"https://openalex.org/C190502265","wikidata":"https://www.wikidata.org/wiki/Q17069496","display_name":"MNIST database","level":3,"score":0.3206999897956848},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.296099990606308},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C44868376","wikidata":"https://www.wikidata.org/wiki/Q3099089","display_name":"Intelligent character recognition","level":4,"score":0.27129998803138733},{"id":"https://openalex.org/C73000952","wikidata":"https://www.wikidata.org/wiki/Q17007827","display_name":"Discretization","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.25529998540878296}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2509.23624","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.23624","pdf_url":"https://arxiv.org/pdf/2509.23624","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2509.23624","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.23624","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2509.23624","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.23624","pdf_url":"https://arxiv.org/pdf/2509.23624","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deep":[0],"generative":[1],"models":[2],"have":[3],"advanced":[4],"text-to-online":[5],"handwriting":[6,66],"generation":[7,158],"(TOHG),":[8],"which":[9],"aims":[10],"to":[11,47,133],"synthesize":[12],"realistic":[13],"pen":[14,136],"trajectories":[15],"conditioned":[16],"on":[17,30],"textual":[18],"input":[19],"and":[20,38,91,111,130,152],"style":[21,153],"references.":[22],"However,":[23],"most":[24],"existing":[25,144],"methods":[26,147],"still":[27],"primarily":[28],"focus":[29],"character-":[31],"or":[32],"word-level":[33],"generation,":[34],"resulting":[35],"in":[36,148],"inefficiency":[37],"a":[39,72,93,103,121],"lack":[40],"of":[41],"holistic":[42],"structural":[43],"modeling":[44],"when":[45],"applied":[46],"full":[48],"text":[49,129],"lines.":[50],"To":[51],"address":[52],"these":[53],"issues,":[54],"we":[55],"propose":[56],"DiffInk,":[57],"the":[58],"first":[59,69],"latent":[60,106,123],"diffusion":[61,124],"Transformer":[62,125],"framework":[63],"for":[64],"full-line":[65],"generation.":[67],"We":[68,117],"introduce":[70,119],"InkVAE,":[71],"novel":[73,122],"sequential":[74],"variational":[75],"autoencoder":[76],"enhanced":[77],"with":[78],"two":[79],"complementary":[80],"latent-space":[81],"regularization":[82,101],"losses:":[83],"(1)":[84],"an":[85],"OCR-based":[86],"loss":[87,95],"enforcing":[88],"glyph-level":[89],"accuracy,":[90],"(2)":[92],"style-classification":[94],"preserving":[96],"writing":[97],"style.":[98],"This":[99],"dual":[100],"yields":[102],"semantically":[104],"structured":[105],"space":[107],"where":[108],"character":[109],"content":[110],"writer":[112],"styles":[113,132],"are":[114],"effectively":[115],"disentangled.":[116],"then":[118],"InkDiT,":[120],"that":[126,141],"integrates":[127],"target":[128],"reference":[131],"generate":[134],"coherent":[135],"trajectories.":[137],"Experimental":[138],"results":[139],"demonstrate":[140],"DiffInk":[142],"outperforms":[143],"state-of-the-art":[145],"(SOTA)":[146],"both":[149],"glyph":[150],"accuracy":[151],"fidelity,":[154],"while":[155],"significantly":[156],"improving":[157],"efficiency.":[159]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-19T00:00:00"}
