{"id":"https://openalex.org/W7140205376","doi":"https://doi.org/10.48550/arxiv.2603.20644","title":"ScaleEdit-12M: Scaling Open-Source Image Editing Data Generation via Multi-Agent Framework","display_name":"ScaleEdit-12M: Scaling Open-Source Image Editing Data Generation via Multi-Agent Framework","publication_year":2026,"publication_date":"2026-03-21","ids":{"openalex":"https://openalex.org/W7140205376","doi":"https://doi.org/10.48550/arxiv.2603.20644"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.20644","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20644","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.20644","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chen, Guanzhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Guanzhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cui, Erfei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Erfei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tian, Changyao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Changyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yang, Danni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Danni","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yang, Ganlin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Ganlin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Qiao, Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Li, Hongsheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hongsheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Luo, Gen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Gen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Zhang, Hongjie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hongjie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4959000051021576,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4959000051021576,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.11050000041723251,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.04430000111460686,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-editing","display_name":"Image editing","score":0.9307000041007996},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7682999968528748},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6517999768257141},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5914000272750854},{"id":"https://openalex.org/keywords/video-editing","display_name":"Video editing","score":0.4754999876022339},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.454800009727478},{"id":"https://openalex.org/keywords/post-production","display_name":"Post-production","score":0.4438000023365021},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4433000087738037}],"concepts":[{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.9307000041007996},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7886000275611877},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7682999968528748},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6517999768257141},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5914000272750854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4844000041484833},{"id":"https://openalex.org/C2780310081","wikidata":"https://www.wikidata.org/wiki/Q1154312","display_name":"Video editing","level":2,"score":0.4754999876022339},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.454800009727478},{"id":"https://openalex.org/C87829876","wikidata":"https://www.wikidata.org/wiki/Q648877","display_name":"Post-production","level":2,"score":0.4438000023365021},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4433000087738037},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.4000999927520752},{"id":"https://openalex.org/C2780967703","wikidata":"https://www.wikidata.org/wiki/Q2571389","display_name":"Collaborative editing","level":2,"score":0.3781000077724457},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.3650999963283539},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.36250001192092896},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3580999970436096},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34880000352859497},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2971000075340271},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.295199990272522},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2727999985218048},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2517000138759613},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.20644","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20644","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.20644","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20644","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.47511377930641174,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Instruction-based":[0],"image":[1,29,75,86,111],"editing":[2,20,30,47,76,93,112,149],"has":[3],"emerged":[4],"as":[5],"a":[6,63,97],"key":[7,83],"capability":[8],"for":[9,37,69,147,162],"unified":[10],"multimodal":[11],"models":[12,36],"(UMMs),":[13],"yet":[14],"constructing":[15],"large-scale,":[16,73],"diverse,":[17],"and":[18,54,96,123,128,143,151,158,180,185],"high-quality":[19,74],"datasets":[21,31],"without":[22],"costly":[23],"proprietary":[24],"APIs":[25],"remains":[26],"challenging.":[27],"Previous":[28],"either":[32],"rely":[33],"on":[34,130,141,145,156,160],"closed-source":[35],"annotation,":[38],"which":[39,49],"prevents":[40],"cost-effective":[41],"scaling,":[42],"or":[43],"employ":[44],"fixed":[45],"synthetic":[46,124],"pipelines,":[48],"suffer":[50],"from":[51],"limited":[52],"quality":[53,100,176],"generalizability.":[55],"To":[56],"address":[57],"these":[58],"challenges,":[59],"we":[60,105],"propose":[61],"ScaleEditor,":[62,104],"fully":[64],"open-source":[65,110],"hierarchical":[66],"multi-agent":[67,92],"framework":[68,184],"end-to-end":[70],"construction":[71],"of":[72,81],"datasets.":[77],"Our":[78],"pipeline":[79],"consists":[80],"three":[82],"components:":[84],"source":[85],"expansion":[87],"with":[88],"world-knowledge":[89],"infusion,":[90],"adaptive":[91],"instruction-image":[94],"synthesis,":[95],"task-aware":[98],"data":[99,175],"verification":[101],"mechanism.":[102],"Using":[103],"curate":[106],"ScaleEdit-12M,":[107],"the":[108,183],"largest":[109],"dataset":[113,186],"to":[114,139,154],"date,":[115],"spanning":[116],"23":[117],"task":[118],"families":[119],"across":[120],"diverse":[121],"real":[122],"domains.":[125],"Fine-tuning":[126],"UniWorld-V1":[127],"Bagel":[129],"ScaleEdit":[131],"yields":[132],"consistent":[133],"gains,":[134],"improving":[135],"performance":[136],"by":[137,152],"up":[138,153],"10.4%":[140],"ImgEdit":[142],"35.1%":[144],"GEdit":[146],"general":[148],"benchmarks":[150],"150.0%":[155],"RISE":[157],"26.5%":[159],"KRIS-Bench":[161],"knowledge-infused":[163],"benchmarks.":[164],"These":[165],"results":[166],"demonstrate":[167],"that":[168],"open-source,":[169],"agentic":[170],"pipelines":[171],"can":[172],"approach":[173],"commercial-grade":[174],"while":[177],"retaining":[178],"cost-effectiveness":[179],"scalability.":[181],"Both":[182],"will":[187],"be":[188],"open-sourced.":[189]},"counts_by_year":[],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2026-03-25T00:00:00"}
