{"id":"https://openalex.org/W7155064422","doi":"https://doi.org/10.48550/arxiv.2604.18168","title":"Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation","display_name":"Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation","publication_year":2026,"publication_date":"2026-04-20","ids":{"openalex":"https://openalex.org/W7155064422","doi":"https://doi.org/10.48550/arxiv.2604.18168"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.18168","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18168","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.18168","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074584116","display_name":"Chenxi Zhao","orcid":"https://orcid.org/0000-0001-7166-2755"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Chenxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134206368","display_name":"Chen Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134144149","display_name":"Xiaokun Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Xiaokun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110806029","display_name":"Aiming Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao, Aiming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017725417","display_name":"Jiashu Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jiashu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059032251","display_name":"Jiachen Lei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei, Jiachen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134132707","display_name":"Jiahong Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Jiahong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134167907","display_name":"Xiangxiang Chu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chu, Xiangxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134164799","display_name":"Jufeng Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jufeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4422000050544739,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4422000050544739,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08330000191926956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.043800000101327896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.751800000667572},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6674000024795532},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6376000046730042},{"id":"https://openalex.org/keywords/text-generation","display_name":"Text generation","score":0.5408999919891357},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5383999943733215},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5254999995231628},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4636000096797943},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44290000200271606},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.43320000171661377}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7835000157356262},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.751800000667572},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6674000024795532},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6376000046730042},{"id":"https://openalex.org/C2985684807","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Text generation","level":2,"score":0.5408999919891357},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5386999845504761},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5383999943733215},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5254999995231628},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4636000096797943},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4449000060558319},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44290000200271606},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.43320000171661377},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.41350001096725464},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3197000026702881},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.31130000948905945},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3091999888420105},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C198942812","wikidata":"https://www.wikidata.org/wiki/Q496618","display_name":"Semantic property","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.25380000472068787},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.25130000710487366},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.18168","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18168","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.18168","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18168","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7473618388175964}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Few-step":[0],"generation":[1,10,180,207],"has":[2],"been":[3],"a":[4,164,215],"long-standing":[5],"goal,":[6],"with":[7],"recent":[8],"one-step":[9],"methods":[11],"exemplified":[12],"by":[13,159],"MeanFlow":[14,21,76,124,156,179,225],"achieving":[15],"remarkable":[16],"results.":[17],"Existing":[18],"research":[19,222],"on":[20,24,199,223],"primarily":[22],"focuses":[23],"class-to-image":[25],"generation.":[26,226],"However,":[27],"an":[28],"intuitive":[29],"yet":[30],"unexplored":[31],"direction":[32],"is":[33,229],"to":[34,42,51,61,114,137,170,182],"extend":[35],"the":[36,52,62,67,75,103,115,123,131,155,172,178,191,200],"condition":[37],"from":[38],"fixed":[39],"class":[40,54,150],"labels":[41],"flexible":[43],"text":[44,56,72,81,91,132,167],"inputs,":[45],"enabling":[46],"richer":[47],"content":[48],"creation.":[49],"Compared":[50],"limited":[53,117],"labels,":[55],"conditions":[57,82],"pose":[58],"greater":[59],"challenges":[60],"model's":[63],"understanding":[64],"capability,":[65],"necessitating":[66],"effective":[68],"integration":[69],"of":[70,119],"powerful":[71,89,165],"encoders":[73,92],"into":[74],"framework.":[77,157],"Surprisingly,":[78],"although":[79],"incorporating":[80],"appears":[83],"straightforward,":[84],"we":[85,106,162,195],"find":[86],"that":[87],"integrating":[88],"LLM-based":[90,166],"using":[93],"conventional":[94],"training":[95],"strategies":[96],"results":[97],"in":[98,122,186],"unsatisfactory":[99],"performance.":[100],"To":[101],"uncover":[102],"underlying":[104],"cause,":[105],"conduct":[107],"detailed":[108],"analyses":[109],"and":[110,147,176,217],"reveal":[111],"that,":[112],"due":[113],"extremely":[116],"number":[118],"refinement":[120],"steps":[121],"generation,":[125],"such":[126],"as":[127],"only":[128],"one":[129],"step,":[130],"feature":[133],"representations":[134],"are":[135],"required":[136,173],"possess":[138,171],"sufficiently":[139],"high":[140],"discriminability.":[141],"This":[142],"also":[143],"explains":[144],"why":[145],"discrete":[146],"easily":[148],"distinguishable":[149],"features":[151],"perform":[152],"well":[153],"within":[154],"Guided":[158],"these":[160],"insights,":[161],"leverage":[163],"encoder":[168],"validated":[169],"semantic":[174],"properties":[175],"adapt":[177],"process":[181],"this":[183,212],"framework,":[184],"resulting":[185],"efficient":[187],"text-conditioned":[188,224],"synthesis":[189],"for":[190,220],"first":[192],"time.":[193],"Furthermore,":[194],"validate":[196],"our":[197],"approach":[198],"widely":[201],"used":[202],"diffusion":[203],"model,":[204],"demonstrating":[205],"significant":[206],"performance":[208],"improvements.":[209],"We":[210],"hope":[211],"work":[213],"provides":[214],"general":[216],"practical":[218],"reference":[219],"future":[221],"The":[227],"code":[228],"available":[230],"at":[231],"https://github.com/AMAP-ML/EMF.":[232]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-04-22T00:00:00"}
