{"id":"https://openalex.org/W7105183145","doi":"https://doi.org/10.48550/arxiv.2511.05616","title":"Personalized Image Editing in Text-to-Image Diffusion Models via Collaborative Direct Preference Optimization","display_name":"Personalized Image Editing in Text-to-Image Diffusion Models via Collaborative Direct Preference Optimization","publication_year":2025,"publication_date":"2025-11-06","ids":{"openalex":"https://openalex.org/W7105183145","doi":"https://doi.org/10.48550/arxiv.2511.05616"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2511.05616","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.05616","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2511.05616","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Dunlop, Connor","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dunlop, Connor","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zheng, Matthew","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Matthew","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Venkatesh, Kavana","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Venkatesh, Kavana","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Yanardag, Pinar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanardag, Pinar","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3312999904155731,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3312999904155731,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.10899999737739563,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07810000330209732,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5712000131607056},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.5565999746322632},{"id":"https://openalex.org/keywords/image-editing","display_name":"Image editing","score":0.5238999724388123},{"id":"https://openalex.org/keywords/collaborative-filtering","display_name":"Collaborative filtering","score":0.41179999709129333},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.37130001187324524},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.3695000112056732}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8230999708175659},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5712000131607056},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.5565999746322632},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.5238999724388123},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4332999885082245},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4251999855041504},{"id":"https://openalex.org/C21569690","wikidata":"https://www.wikidata.org/wiki/Q94702","display_name":"Collaborative filtering","level":3,"score":0.41179999709129333},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3862000107765198},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.37130001187324524},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.3695000112056732},{"id":"https://openalex.org/C2780310081","wikidata":"https://www.wikidata.org/wiki/Q1154312","display_name":"Video editing","level":2,"score":0.35019999742507935},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26179999113082886},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2511.05616","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.05616","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2511.05616","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.05616","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text-to-image":[0],"(T2I)":[1],"diffusion":[2,46,105],"models":[3,18],"have":[4],"made":[5],"remarkable":[6],"strides":[7],"in":[8,45,79,143],"generating":[9,144],"and":[10,84,125,133],"editing":[11,44,107],"high-fidelity":[12],"images":[13],"from":[14,68],"text.":[15],"Yet,":[16],"these":[17,111],"remain":[19],"fundamentally":[20],"generic,":[21],"failing":[22],"to":[23,25],"adapt":[24],"the":[26,38],"nuanced":[27],"aesthetic":[28],"preferences":[29,63],"of":[30],"individual":[31,123],"users.":[32],"In":[33],"this":[34],"work,":[35],"we":[36],"present":[37],"first":[39],"framework":[40],"for":[41,122],"personalized":[42,112],"image":[43,59],"models,":[47],"introducing":[48],"Collaborative":[49],"Direct":[50],"Preference":[51],"Optimization":[52],"(C-DPO),":[53],"a":[54,77,80,88,104,115],"novel":[55,116],"method":[56,139],"that":[57,137,146],"aligns":[58],"edits":[60,145],"with":[61,98,149],"user-specific":[62],"while":[64],"leveraging":[65],"collaborative":[66],"signals":[67],"like-minded":[69],"individuals.":[70],"Our":[71],"approach":[72],"encodes":[73],"each":[74],"user":[75,131,150],"as":[76],"node":[78],"dynamic":[81],"preference":[82],"graph":[83,90],"learns":[85],"embeddings":[86,113],"via":[87],"lightweight":[89],"neural":[91],"network,":[92],"enabling":[93],"information":[94],"sharing":[95],"across":[96],"users":[97],"overlapping":[99],"visual":[100],"tastes.":[101],"We":[102],"enhance":[103],"model's":[106],"capabilities":[108],"by":[109],"integrating":[110],"into":[114],"DPO":[117],"objective,":[118],"which":[119],"jointly":[120],"optimizes":[121],"alignment":[124],"neighborhood":[126],"coherence.":[127],"Comprehensive":[128],"experiments,":[129],"including":[130],"studies":[132],"quantitative":[134],"benchmarks,":[135],"demonstrate":[136],"our":[138],"consistently":[140],"outperforms":[141],"baselines":[142],"are":[147],"aligned":[148],"preferences.":[151]},"counts_by_year":[],"updated_date":"2025-11-12T23:15:19.534421","created_date":"2025-11-12T00:00:00"}
