{"id":"https://openalex.org/W7134820656","doi":"https://doi.org/10.48550/arxiv.2603.08652","title":"CoCo: Code as CoT for Text-to-Image Preview and Rare Concept Generation","display_name":"CoCo: Code as CoT for Text-to-Image Preview and Rare Concept Generation","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134820656","doi":"https://doi.org/10.48550/arxiv.2603.08652"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08652","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128682860","display_name":"Haodong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Haodong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128642669","display_name":"Chunmei Qing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qing, Chunmei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128649288","display_name":"Huanyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Huanyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102634569","display_name":"Dongzhi Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Dongzhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101333210","display_name":"Yihang Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Yihang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123154718","display_name":"Hongbo Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Hongbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088816340","display_name":"Dingming Li","orcid":"https://orcid.org/0000-0001-9695-7496"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Dingming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128650373","display_name":"Yuhong Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Yuhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123925414","display_name":"Zepeng Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, ZePeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128644516","display_name":"Juanxi Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Juanxi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128688278","display_name":"Yi Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128665448","display_name":"Siqi Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Siqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128659813","display_name":"Jingwei Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Jingwei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5128682860"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.710099995136261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.710099995136261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.1891999989748001,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.007899999618530273,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.9527999758720398},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.679099977016449},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.6209999918937683},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5645999908447266},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5230000019073486},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.5167999863624573},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.4334999918937683},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.4113999903202057}],"concepts":[{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.9527999758720398},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7979000210762024},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.679099977016449},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.6209999918937683},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.6115000247955322},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5645999908447266},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5230000019073486},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.5167999863624573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4690999984741211},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.4334999918937683},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.4113999903202057},{"id":"https://openalex.org/C2779639559","wikidata":"https://www.wikidata.org/wiki/Q7661178","display_name":"Symbolic execution","level":3,"score":0.4050999879837036},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3343000113964081},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2978000044822693},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2786000072956085},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2578999996185303},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.25760000944137573},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08652","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.08652","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08652","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08652","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.48211199045181274,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,97],"Unified":[3],"Multimodal":[4],"Models":[5],"(UMMs)":[6],"have":[7],"significantly":[8],"advanced":[9],"text-to-image":[10,197],"(T2I)":[11],"generation,":[12,169],"particularly":[13],"through":[14,113],"the":[15,34,61,87,91,119],"integration":[16],"of":[17,90,162],"Chain-of-Thought":[18],"(CoT)":[19],"reasoning.":[20],"However,":[21],"existing":[22],"CoT-based":[23],"T2I":[24],"methods":[25,175],"largely":[26],"rely":[27],"on":[28,152],"abstract":[29],"natural-language":[30],"planning,":[31],"which":[32,93],"lacks":[33],"precision":[35],"required":[36],"for":[37,73,192],"complex":[38],"spatial":[39],"layouts,":[40],"structured":[41,135,143,196],"visual":[42,148],"elements,":[43],"and":[44,69,146,155,165,188,195],"dense":[45],"textual":[46],"content.":[47],"In":[48],"this":[49,111,125],"work,":[50],"we":[51,128],"propose":[52],"CoCo":[53,80,159],"(Code-as-CoT),":[54],"a":[55,77,98,103,131],"code-driven":[56],"reasoning":[57,62,190],"framework":[58],"that":[59,85,158,182],"represents":[60],"process":[63],"as":[64],"executable":[65,83,183],"code,":[66],"enabling":[67],"explicit":[68],"verifiable":[70],"intermediate":[71],"planning":[72],"image":[74,115,137],"generation.":[75,198],"Given":[76],"text":[78],"prompt,":[79],"first":[81],"generates":[82],"code":[84,184,200],"specifies":[86],"structural":[88],"layout":[89],"scene,":[92],"is":[94,185,201],"then":[95],"executed":[96],"sandboxed":[99],"environment":[100],"to":[101,117,140],"render":[102],"deterministic":[104],"draft":[105,112,144],"image.":[106],"The":[107,199],"model":[108],"subsequently":[109],"refines":[110],"fine-grained":[114],"editing":[116],"produce":[118],"final":[120],"high-fidelity":[121],"result.":[122],"To":[123],"support":[124],"training":[126],"paradigm,":[127],"construct":[129],"CoCo-10K,":[130],"curated":[132],"dataset":[133],"containing":[134],"draft-final":[136],"pairs":[138],"designed":[139],"teach":[141],"both":[142],"construction":[145],"corrective":[147],"refinement.":[149],"Empirical":[150],"evaluations":[151],"StructT2IBench,":[153],"OneIG-Bench,":[154],"LongText-Bench":[156],"show":[157],"achieves":[160],"improvements":[161],"+68.83%,":[163],"+54.8%,":[164],"+41.23%":[166],"over":[167],"direct":[168],"while":[170],"also":[171],"outperforming":[172],"other":[173],"generation":[174],"empowered":[176],"by":[177],"CoT.":[178],"These":[179],"results":[180],"demonstrate":[181],"an":[186],"effective":[187],"reliable":[189],"paradigm":[191],"precise,":[193],"controllable,":[194],"available":[202],"at:":[203],"https://github.com/micky-li-hd/CoCo":[204]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-11T00:00:00"}
