{"id":"https://openalex.org/W7141258663","doi":"https://doi.org/10.48550/arxiv.2603.25319","title":"MACRO: Advancing Multi-Reference Image Generation with Structured Long-Context Data","display_name":"MACRO: Advancing Multi-Reference Image Generation with Structured Long-Context Data","publication_year":2026,"publication_date":"2026-03-26","ids":{"openalex":"https://openalex.org/W7141258663","doi":"https://doi.org/10.48550/arxiv.2603.25319"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.25319","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25319","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.25319","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104201235","display_name":"Zhekai Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Zhekai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130756860","display_name":"Yuqing Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130732718","display_name":"Manyuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Manyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130757577","display_name":"Xihui Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xihui","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5104201235"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8464999794960022,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8464999794960022,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.10400000214576721,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.004900000058114529,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7914999723434448},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6129000186920166},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.4968000054359436},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4586000144481659},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.32899999618530273},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32600000500679016},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.311599999666214}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7914999723434448},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.732699990272522},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6129000186920166},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5130000114440918},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4968000054359436},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.492900013923645},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4586000144481659},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.44749999046325684},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32600000500679016},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.311599999666214},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.30640000104904175},{"id":"https://openalex.org/C84945661","wikidata":"https://www.wikidata.org/wiki/Q7366567","display_name":"Root cause","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C171078966","wikidata":"https://www.wikidata.org/wiki/Q111029","display_name":"Root (linguistics)","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25929999351501465},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.25850000977516174}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.25319","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25319","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.25319","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25319","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Generating":[0],"images":[1],"conditioned":[2],"on":[3,144],"multiple":[4],"visual":[5],"references":[6,35],"is":[7],"critical":[8],"for":[9,165],"real-world":[10],"applications":[11],"such":[12],"as":[13,30,42],"multi-subject":[14],"composition,":[15],"narrative":[16],"illustration,":[17],"and":[18,56,98,136,152,162,171],"novel":[19],"view":[20],"synthesis,":[21],"yet":[22],"current":[23],"models":[24],"suffer":[25],"from":[26],"severe":[27],"performance":[28],"degradation":[29],"the":[31,39,58,107,112],"number":[32],"of":[33,77,106,115,125,159],"input":[34,137],"grows.":[36],"We":[37],"identify":[38],"root":[40],"cause":[41],"a":[43,74,123],"fundamental":[44],"data":[45],"bottleneck:":[46],"existing":[47],"datasets":[48],"are":[49],"dominated":[50],"by":[51],"single-":[52],"or":[53],"few-reference":[54],"pairs":[55],"lack":[57],"structured,":[59],"long-context":[60,167],"supervision":[61],"needed":[62],"to":[63,83,102],"learn":[64],"dense":[65],"inter-reference":[66],"dependencies.":[67],"To":[68],"address":[69],"this,":[70],"we":[71,119],"introduce":[72],"MacroData,":[73],"large-scale":[75],"dataset":[76,170],"400K":[78],"samples,":[79],"each":[80],"containing":[81],"up":[82],"10":[84],"reference":[85],"images,":[86],"systematically":[87],"organized":[88],"across":[89,132],"four":[90],"complementary":[91],"dimensions":[92,135],"--":[93,101],"Customization,":[94],"Illustration,":[95],"Spatial":[96],"reasoning,":[97],"Temporal":[99],"dynamics":[100],"provide":[103],"comprehensive":[104],"coverage":[105],"multi-reference":[108,150],"generation":[109],"space.":[110],"Recognizing":[111],"concurrent":[113],"absence":[114],"standardized":[116],"evaluation":[117],"protocols,":[118],"further":[120,155],"propose":[121],"MacroBench,":[122],"benchmark":[124,172],"4,000":[126],"samples":[127],"that":[128,142],"assesses":[129],"generative":[130],"coherence":[131],"graded":[133],"task":[134],"scales.":[138],"Extensive":[139],"experiments":[140],"show":[141],"fine-tuning":[143],"MacroData":[145],"yields":[146],"substantial":[147],"improvements":[148],"in":[149],"generation,":[151],"ablation":[153],"studies":[154],"reveal":[156],"synergistic":[157],"benefits":[158],"cross-task":[160],"co-training":[161],"effective":[163],"strategies":[164],"handling":[166],"complexity.":[168],"The":[169],"will":[173],"be":[174],"publicly":[175],"released.":[176]},"counts_by_year":[],"updated_date":"2026-03-28T06:16:51.555046","created_date":"2026-03-28T00:00:00"}
