{"id":"https://openalex.org/W7137906030","doi":"https://doi.org/10.1609/aaai.v40i4.37311","title":"LAMIC: Layout-Aware Multi-Image Composition via Scalability of Multimodal Diffusion Transformer","display_name":"LAMIC: Layout-Aware Multi-Image Composition via Scalability of Multimodal Diffusion Transformer","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137906030","doi":"https://doi.org/10.1609/aaai.v40i4.37311"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i4.37311","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37311","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i4.37311","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129744486","display_name":"Yuzhuo Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuzhuo Chen","raw_affiliation_strings":["Anhui Province Key Laboratory of Digital Security, University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Anhui Province Key Laboratory of Digital Security, University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036060690","display_name":"Zehua Ma","orcid":"https://orcid.org/0000-0002-8153-341X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zehua Ma","raw_affiliation_strings":["Anhui Province Key Laboratory of Digital Security, University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Anhui Province Key Laboratory of Digital Security, University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129653717","display_name":"Jianhua Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I2800608841","display_name":"Lowestoft Hospital","ror":"https://ror.org/01fsf9j67","country_code":"GB","type":"healthcare","lineage":["https://openalex.org/I2800608841","https://openalex.org/I4210135926"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jianhua Wang","raw_affiliation_strings":["Onestory Team, Chuangyi"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Onestory Team, Chuangyi","institution_ids":["https://openalex.org/I2800608841"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129663863","display_name":"Kai Kang","orcid":null},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Kang","raw_affiliation_strings":["East China Normal University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"East China Normal University","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129661830","display_name":"Shunyu Yao","orcid":null},"institutions":[{"id":"https://openalex.org/I2800608841","display_name":"Lowestoft Hospital","ror":"https://ror.org/01fsf9j67","country_code":"GB","type":"healthcare","lineage":["https://openalex.org/I2800608841","https://openalex.org/I4210135926"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shunyu Yao","raw_affiliation_strings":["Onestory Team, Chuangyi"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Onestory Team, Chuangyi","institution_ids":["https://openalex.org/I2800608841"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129739985","display_name":"Weiming Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiming Zhang","raw_affiliation_strings":["Anhui Province Key Laboratory of Digital Security, University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Anhui Province Key Laboratory of Digital Security, University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5129744486"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12660944,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"4","first_page":"3174","last_page":"3182"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9520000219345093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9520000219345093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.006200000178068876,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.005200000014156103,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6452000141143799},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4449000060558319},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.41280001401901245},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.36959999799728394},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.36910000443458557},{"id":"https://openalex.org/keywords/composition","display_name":"Composition (language)","score":0.32659998536109924}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.733299970626831},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6452000141143799},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4449000060558319},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.41280001401901245},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41040000319480896},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3783999979496002},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.36910000443458557},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3328000009059906},{"id":"https://openalex.org/C40231798","wikidata":"https://www.wikidata.org/wiki/Q1333743","display_name":"Composition (language)","level":2,"score":0.32659998536109924},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3197999894618988},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.31850001215934753},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.3091999888420105},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3052999973297119},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2635999917984009},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C2775941552","wikidata":"https://www.wikidata.org/wiki/Q25212305","display_name":"Isolation (microbiology)","level":2,"score":0.2605000138282776}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i4.37311","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37311","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i4.37311","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37311","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7030766010284424,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"controllable":[1,190],"image":[2],"synthesis,":[3],"generating":[4],"coherent":[5],"and":[6,64,87,95,126,132,154,176],"consistent":[7],"images":[8],"from":[9],"multiple":[10],"references":[11],"with":[12],"spatial":[13],"layout":[14,93,152],"awareness":[15],"remains":[16],"an":[17],"open":[18],"challenge.":[19],"We":[20],"propose":[21],"LAMIC,":[22],"a":[23,41,185],"Layout-Aware":[24],"Multi-Image":[25],"Composition":[26],"framework":[27],"that,":[28],"for":[29,91,100,189],"the":[30,46,134,170],"first":[31],"time,":[32],"extends":[33],"single-reference":[34,174],"diffusion":[35],"models":[36,175,195],"to":[37,60,69,180,197,203],"multi-reference":[38,120],"scenarios":[39],"in":[40,122,137,147],"training-free":[42,187],"manner.":[43],"Built":[44],"upon":[45],"MMDiT":[47],"model,":[48],"LAMIC":[49,108,183],"introduces":[50],"two":[51],"plug-and-play":[52],"attention":[53],"mechanisms:":[54],"1)":[55,83],"Group":[56],"Isolation":[57],"Attention":[58,67],"(GIA)":[59],"enhance":[61],"entity":[62],"disentanglement;":[63],"2)":[65,96],"Region-Modulated":[66],"(RMA)":[68],"enable":[70],"layout-aware":[71],"generation.":[72],"To":[73],"comprehensively":[74],"evaluate":[75],"model":[76],"capabilities,":[77],"we":[78],"further":[79],"introduce":[80],"three":[81],"metrics:":[82,115],"Inclusion":[84],"Ratio":[85,89],"(IN-R)":[86],"Fill":[88],"(FI-R)":[90],"assessing":[92],"control;":[94],"Background":[97],"Similarity":[98],"(BG-S)":[99],"measuring":[101],"background":[102,150],"consistency.":[103],"Extensive":[104],"experiments":[105],"show":[106],"that":[107],"achieves":[109,133],"state-of-the-art":[110],"performance":[111,200],"across":[112,129],"most":[113],"major":[114],"it":[116],"consistently":[117],"outperforms":[118],"existing":[119],"baselines":[121],"ID-S,":[123],"BG-S,":[124],"IN-R":[125],"AVG":[127],"scores":[128],"all":[130,156],"settings,":[131],"best":[135],"DPG":[136],"complex":[138],"composition":[139],"tasks.":[140],"These":[141],"results":[142],"demonstrate":[143],"LAMIC's":[144,199],"superior":[145],"abilities":[146],"identity":[148],"keeping,":[149],"preservation,":[151],"control,":[153],"prompt-following,":[155],"achieved":[157],"without":[158],"any":[159],"training":[160],"or":[161],"fine-tuning,":[162],"showcasing":[163],"strong":[164],"zero-shot":[165],"generalization":[166],"ability.":[167],"By":[168],"inheriting":[169],"strengths":[171],"of":[172],"advanced":[173],"enabling":[177],"seamless":[178],"extension":[179],"multi-image":[181,191],"scenarios,":[182],"establishes":[184],"new":[186],"paradigm":[188],"composition.":[192],"As":[193],"foundation":[194],"continue":[196],"evolve,":[198],"is":[201],"expected":[202],"scale":[204],"accordingly.":[205]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
