{"id":"https://openalex.org/W4415538863","doi":"https://doi.org/10.1145/3746027.3754543","title":"Position-LoRA: Enhanced Relation Customization through Structural Prior in Initial Latent Noise","display_name":"Position-LoRA: Enhanced Relation Customization through Structural Prior in Initial Latent Noise","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415538863","doi":"https://doi.org/10.1145/3746027.3754543"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3754543","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754543","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100587789","display_name":"Yiming Li","orcid":"https://orcid.org/0009-0001-7862-840X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiming Li","raw_affiliation_strings":["MoE Key Lab of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0001-7862-840X","affiliations":[{"raw_affiliation_string":"MoE Key Lab of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101505982","display_name":"Peng Zhou","orcid":"https://orcid.org/0000-0002-0674-9296"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Zhou","raw_affiliation_strings":["Nanjing University of Aeronautics and Astronautics, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-0674-9296","affiliations":[{"raw_affiliation_string":"Nanjing University of Aeronautics and Astronautics, Nanjing, China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiaokang Qin","orcid":"https://orcid.org/0009-0009-7635-9101"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaokang Qin","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0009-7635-9101","affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hongwei Hu","orcid":"https://orcid.org/0009-0006-7662-5850"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hongwei Hu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0006-7662-5850","affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100507310","display_name":"Jun Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Sun","raw_affiliation_strings":["Shanghai Key Lab of Digital Media Processing and Transmission, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0001-1956-694X","affiliations":[{"raw_affiliation_string":"Shanghai Key Lab of Digital Media Processing and Transmission, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000921151","display_name":"Yi Xu","orcid":"https://orcid.org/0000-0001-6508-4469"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Xu","raw_affiliation_strings":["MoE Key Lab of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-6508-4469","affiliations":[{"raw_affiliation_string":"MoE Key Lab of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100587789"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37029642,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"9247","last_page":"9256"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.6697999835014343},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.6104000210762024},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.6015999913215637},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.6008999943733215},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.5622000098228455},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5181999802589417},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5101000070571899},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4896000027656555},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.4327999949455261}],"concepts":[{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.6697999835014343},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.652400016784668},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.6104000210762024},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.6015999913215637},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.6008999943733215},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.5622000098228455},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5181999802589417},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5101000070571899},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4896000027656555},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4555000066757202},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.4327999949455261},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3716000020503998},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3601999878883362},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.3515999913215637},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.34610000252723694},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.33959999680519104},{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.32659998536109924},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.30079999566078186},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2888999879360199},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C2780909371","wikidata":"https://www.wikidata.org/wiki/Q4801092","display_name":"Artificial noise","level":4,"score":0.2542000114917755},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2538999915122986},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3754543","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754543","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6360183275","display_name":null,"funder_award_id":"22DZ2229005","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"},{"id":"https://openalex.org/G7976557715","display_name":null,"funder_award_id":"62171282","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321885","display_name":"Science and Technology Commission of Shanghai Municipality","ror":"https://ror.org/03kt66j61"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W3216352822","https://openalex.org/W4312740349","https://openalex.org/W4312933868","https://openalex.org/W4312980231","https://openalex.org/W4319300158","https://openalex.org/W4385271281","https://openalex.org/W4386072096","https://openalex.org/W4386076027","https://openalex.org/W4386076425","https://openalex.org/W4386076532","https://openalex.org/W4389334940","https://openalex.org/W4390872671","https://openalex.org/W4390873054","https://openalex.org/W4390873260","https://openalex.org/W4400819434","https://openalex.org/W4403791578","https://openalex.org/W4404965624"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,62,112,144,158],"concept":[3,153],"customization":[4,68,154],"via":[5],"diffusion":[6],"models":[7],"have":[8],"significantly":[9],"enhanced":[10],"controllability":[11],"and":[12,42,69,78,110,120,132,142,155,160],"quality.":[13],"However,":[14],"precise":[15,107],"relation":[16,67],"customization,":[17],"which":[18],"controls":[19],"the":[20,46,129],"position":[21],"of":[22,49],"interactions":[23],"among":[24],"multiple":[25],"instances,":[26],"remains":[27],"challenging":[28],"due":[29],"to":[30,65,127],"unpredictable":[31],"initial":[32,50,63,91],"latent":[33,80,99,104],"noise.":[34,51],"Existing":[35],"methods":[36,157],"primarily":[37],"rely":[38],"on":[39],"conditional":[40],"prompts":[41],"attention":[43],"control,":[44,150],"overlooking":[45],"structured":[47,90],"potential":[48],"This":[52],"paper":[53],"introduces":[54],"Position-LoRA,":[55],"a":[56,74,79,117],"novel":[57],"framework":[58],"leveraging":[59],"structural":[60],"prior":[61],"noise":[64,81,100],"improve":[66],"layout":[70,133,149],"control.":[71],"Position-LoRA":[72,138],"employs":[73],"differential":[75],"fine-tuning":[76,85],"scheme":[77],"encoder.":[82],"The":[83,98],"guided":[84],"enhances":[86],"generation":[87,126,147],"tendencies":[88],"from":[89],"noise,":[92],"embedding":[93],"explicit":[94],"relationship-specific":[95],"spatial":[96,108],"information.":[97],"encoder":[101],"dynamically":[102],"manipulates":[103],"noises,":[105],"enabling":[106],"control":[109,121],"flexibility":[111],"relational":[113,145],"image":[114,146],"generation.":[115],"Furthermore,":[116],"fine-grained":[118],"guidance":[119],"strategy":[122],"is":[123,164],"employed":[124],"during":[125],"enhance":[128],"image-text":[130],"alignment":[131],"alignment.":[134],"Experiments":[135],"demonstrate":[136],"that":[137],"improves":[139],"stability,":[140],"controllability,":[141],"fidelity":[143],"with":[148],"surpassing":[151],"existing":[152],"layout-to-image":[156],"qualitative":[159],"quantitative":[161],"evaluations.":[162],"Code":[163],"available":[165],"at":[166],"https://github.com/liyiming09/Position-LoRA.":[167]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
