{"id":"https://openalex.org/W7137915226","doi":"https://doi.org/10.1609/aaai.v40i11.37866","title":"Insert Anything: Image Insertion via In-Context Editing in DiT","display_name":"Insert Anything: Image Insertion via In-Context Editing in DiT","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137915226","doi":"https://doi.org/10.1609/aaai.v40i11.37866"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i11.37866","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37866","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i11.37866","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109776978","display_name":"Wensong Song","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wensong Song","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129684805","display_name":"Hong Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Jiang","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129708199","display_name":"Zongxin Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zongxin Yang","raw_affiliation_strings":["Harvard University"],"affiliations":[{"raw_affiliation_string":"Harvard University","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124990309","display_name":"Zheqiao Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheqiao Cheng","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129710513","display_name":"Ruijie Quan","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ruijie Quan","raw_affiliation_strings":["Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129693635","display_name":"Yi Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Yang","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5109776978"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15298507,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"11","first_page":"9097","last_page":"9105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9646999835968018,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9646999835968018,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.009600000455975533,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.00419999985024333,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-editing","display_name":"Image editing","score":0.6108999848365784},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5853000283241272},{"id":"https://openalex.org/keywords/image-manipulation","display_name":"Image manipulation","score":0.4553999900817871},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45320001244544983},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.39579999446868896},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.3652999997138977}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8187999725341797},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6126999855041504},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.6108999848365784},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.589900016784668},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5853000283241272},{"id":"https://openalex.org/C2987933465","wikidata":"https://www.wikidata.org/wiki/Q141130","display_name":"Image manipulation","level":3,"score":0.4553999900817871},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45320001244544983},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.39579999446868896},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3652999997138977},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.33250001072883606},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2687999904155731},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i11.37866","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37866","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i11.37866","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37866","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"work":[1],"presents":[2],"Insert":[3],"Anything,":[4],"a":[5,74,81],"unified":[6],"framework":[7],"for":[8,32,52],"reference-based":[9],"image":[10,55,135],"insertion":[11,78],"that":[12,131,167],"seamlessly":[13],"integrates":[14],"objects":[15],"from":[16],"reference":[17,53,134],"images":[18],"into":[19],"target":[20,150],"scenes":[21],"under":[22],"flexible,":[23],"user-specified":[24],"control":[25],"guidance.":[26],"Instead":[27],"of":[28,77,112],"training":[29],"separate":[30],"models":[31],"individual":[33],"tasks,":[34],"our":[35,41,168],"approach":[36],"is":[37],"trained":[38],"once":[39],"on":[40,160],"new":[42],"AnyInsertion":[43],"dataset,":[44],"the":[45,109,113,133,145,149],"first":[46],"open-source":[47],"large-scale":[48],"dataset":[49],"specifically":[50],"designed":[51],"image\u2013based":[54],"editing,":[56],"comprising":[57],"136K":[58],"prompt-image":[59],"pairs":[60],"covering":[61],"diverse":[62],"tasks":[63],"such":[64,181],"as":[65,136,182],"person,":[66],"object,":[67],"and":[68,89,100,121,163,188],"garment":[69],"insertion--and":[70],"effortlessly":[71],"generalizes":[72],"to":[73,107,117,143],"wide":[75],"range":[76],"scenarios.":[79],"Such":[80],"challenging":[82],"setting":[83],"requires":[84],"capturing":[85],"both":[86,119],"identity":[87],"features":[88],"fine-grained":[90],"details,":[91],"while":[92,152],"allowing":[93],"versatile":[94],"local":[95],"adaptations":[96],"in":[97,178],"style,":[98],"color,":[99],"texture.":[101],"To":[102],"this":[103],"end,":[104],"we":[105,125],"propose":[106],"leverage":[108],"multimodal":[110],"attention":[111],"Diffusion":[114],"Transformer":[115],"(DiT)":[116],"support":[118],"mask-":[120],"text-guided":[122],"editing.":[123],"Furthermore,":[124],"introduce":[126],"an":[127],"in-context":[128],"editing":[129],"mechanism":[130],"treats":[132],"contextual":[137],"information,":[138],"employing":[139],"two":[140],"prompting":[141],"strategies":[142],"harmonize":[144],"inserted":[146],"elements":[147],"with":[148],"scene":[151,189],"faithfully":[153],"preserving":[154],"their":[155],"distinctive":[156],"features.":[157],"Extensive":[158],"experiments":[159],"AnyInsertion,":[161],"DreamBooth,":[162],"VTON-HD":[164],"benchmarks":[165],"demonstrate":[166],"method":[169],"consistently":[170],"outperforms":[171],"existing":[172],"alternatives,":[173],"underscoring":[174],"its":[175],"great":[176],"potential":[177],"real-world":[179],"applications":[180],"creative":[183],"content":[184],"generation,":[185],"virtual":[186],"try-on,":[187],"composition.":[190]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
