{"id":"https://openalex.org/W7138089739","doi":"https://doi.org/10.48550/arxiv.2603.13547","title":"NumColor: Precise Numeric Color Control in Text-to-Image Generation","display_name":"NumColor: Precise Numeric Color Control in Text-to-Image Generation","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7138089739","doi":"https://doi.org/10.48550/arxiv.2603.13547"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13547","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13547","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13547","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002591376","display_name":"Muhammad Atif Butt","orcid":"https://orcid.org/0000-0001-9832-6487"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Butt, Muhammad Atif","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128646654","display_name":"Diego Hern\u00e1ndez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hernandez, Diego","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119833538","display_name":"Alexandra Gomez-Villa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gomez-Villa, Alexandra","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129749400","display_name":"Kai Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Kai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129660347","display_name":"Javier Vazquez-Corral","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vazquez-Corral, Javier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129747632","display_name":"Joost Van De Weijer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Van De Weijer, Joost","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5002591376"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7161999940872192,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7161999940872192,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.026799999177455902,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.013700000010430813,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.7307999730110168},{"id":"https://openalex.org/keywords/color-space","display_name":"Color space","score":0.6473000049591064},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5778999924659729},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4977000057697296},{"id":"https://openalex.org/keywords/color-depth","display_name":"Color depth","score":0.4918000102043152},{"id":"https://openalex.org/keywords/color-coding","display_name":"Color-coding","score":0.4869999885559082},{"id":"https://openalex.org/keywords/color-quantization","display_name":"Color quantization","score":0.43230000138282776},{"id":"https://openalex.org/keywords/icc-profile","display_name":"ICC profile","score":0.4237000048160553},{"id":"https://openalex.org/keywords/color-image","display_name":"Color image","score":0.42089998722076416}],"concepts":[{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.7307999730110168},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6474999785423279},{"id":"https://openalex.org/C2961294","wikidata":"https://www.wikidata.org/wiki/Q166863","display_name":"Color space","level":3,"score":0.6473000049591064},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6118999719619751},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6115999817848206},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5778999924659729},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4977000057697296},{"id":"https://openalex.org/C91522604","wikidata":"https://www.wikidata.org/wiki/Q690110","display_name":"Color depth","level":5,"score":0.4918000102043152},{"id":"https://openalex.org/C104597421","wikidata":"https://www.wikidata.org/wiki/Q5148529","display_name":"Color-coding","level":2,"score":0.4869999885559082},{"id":"https://openalex.org/C173752661","wikidata":"https://www.wikidata.org/wiki/Q1396414","display_name":"Color quantization","level":5,"score":0.43230000138282776},{"id":"https://openalex.org/C95143428","wikidata":"https://www.wikidata.org/wiki/Q375296","display_name":"ICC profile","level":5,"score":0.4237000048160553},{"id":"https://openalex.org/C142616399","wikidata":"https://www.wikidata.org/wiki/Q5148604","display_name":"Color image","level":4,"score":0.42089998722076416},{"id":"https://openalex.org/C12043971","wikidata":"https://www.wikidata.org/wiki/Q2636542","display_name":"Color histogram","level":5,"score":0.41260001063346863},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.40939998626708984},{"id":"https://openalex.org/C36262787","wikidata":"https://www.wikidata.org/wiki/Q2294018","display_name":"Color model","level":4,"score":0.39480000734329224},{"id":"https://openalex.org/C84216515","wikidata":"https://www.wikidata.org/wiki/Q375677","display_name":"RGB color space","level":5,"score":0.3871999979019165},{"id":"https://openalex.org/C159784718","wikidata":"https://www.wikidata.org/wiki/Q182571","display_name":"Color balance","level":5,"score":0.37400001287460327},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.34369999170303345},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3409000039100647},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32659998536109924},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.3224000036716461},{"id":"https://openalex.org/C131910990","wikidata":"https://www.wikidata.org/wiki/Q1202284","display_name":"High color","level":5,"score":0.31679999828338623},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C44404184","wikidata":"https://www.wikidata.org/wiki/Q5148620","display_name":"Color normalization","level":5,"score":0.29190000891685486},{"id":"https://openalex.org/C2779255053","wikidata":"https://www.wikidata.org/wiki/Q370424","display_name":"Local color","level":2,"score":0.28049999475479126},{"id":"https://openalex.org/C36372059","wikidata":"https://www.wikidata.org/wiki/Q376492","display_name":"HSL and HSV","level":3,"score":0.27570000290870667},{"id":"https://openalex.org/C186991048","wikidata":"https://www.wikidata.org/wiki/Q1184883","display_name":"Color difference","level":3,"score":0.25760000944137573},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.2524999976158142},{"id":"https://openalex.org/C2779495555","wikidata":"https://www.wikidata.org/wiki/Q5148596","display_name":"Color correction","level":3,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13547","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13547","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13547","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13547","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4977104961872101,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text-to-image":[0],"diffusion":[1,60],"models":[2],"excel":[3],"at":[4],"generating":[5],"images":[6,135],"from":[7,29],"natural":[8],"language":[9],"descriptions,":[10],"yet":[11],"fail":[12],"to":[13,45,87,109,156],"interpret":[14],"numerical":[15,55,167],"colors":[16,86],"such":[17],"as":[18],"hex":[19],"codes":[20,35],"(#FF5733)":[21],"and":[22,77,106,115,160],"RGB":[23],"values":[24],"(rgb(255,87,51)).":[25],"This":[26],"limitation":[27],"stems":[28],"subword":[30],"tokenization,":[31,76],"which":[32],"fragments":[33],"color":[34,47,56,72,120,168,178],"into":[36],"semantically":[37],"meaningless":[38],"tokens":[39],"that":[40,52,70,84],"text":[41,91],"encoders":[42],"cannot":[43],"map":[44,85],"coherent":[46],"representations.":[48],"We":[49,99],"present":[50],"NumColor,":[51],"enables":[53],"precise":[54],"control":[57],"across":[58,172],"multiple":[59],"architectures.":[61],"NumColor":[62,153,165],"comprises":[63],"two":[64,101],"components:":[65],"a":[66,78,129],"Color":[67],"Token":[68],"Aggregator":[69],"detects":[71],"specifications":[73],"regardless":[74],"of":[75,90,132],"ColorBook":[79],"containing":[80],"6,707":[81],"learnable":[82],"embeddings":[83],"embedding":[88,116],"space":[89],"encoder":[92],"in":[93,145],"perceptually":[94],"uniform":[95],"CIE":[96],"Lab":[97,114],"space.":[98],"introduce":[100],"auxiliary":[102],"losses,":[103],"directional":[104],"alignment":[105],"interpolation":[107],"consistency,":[108],"enforce":[110],"geometric":[111],"correspondence":[112],"between":[113],"spaces,":[117],"enabling":[118],"smooth":[119],"interpolation.":[121],"To":[122],"train":[123],"the":[124,141],"ColorBook,":[125],"we":[126],"construct":[127],"NumColor-Data,":[128],"synthetic":[130],"dataset":[131],"500K":[133],"rendered":[134],"with":[136],"unambiguous":[137],"color-to-pixel":[138],"correspondence,":[139],"eliminating":[140],"annotation":[142],"ambiguity":[143],"inherent":[144],"photographic":[146],"datasets.":[147],"Although":[148],"trained":[149],"solely":[150],"on":[151,183],"FLUX,":[152],"transfers":[154],"zero-shot":[155],"SD3,":[157],"SD3.5,":[158],"PixArt-\u03b1,":[159],"PixArt-\u03a3":[161],"without":[162],"model-specific":[163],"adaptation.":[164],"improves":[166],"accuracy":[169],"by":[170,181],"4-9x":[171],"five":[173],"models,":[174],"while":[175],"simultaneously":[176],"improving":[177],"harmony":[179],"scores":[180],"10-30x":[182],"GenColorBench":[184],"benchmark.":[185]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-18T00:00:00"}
