{"id":"https://openalex.org/W4415540312","doi":"https://doi.org/10.1145/3746027.3755220","title":"Inversion-DPO: Precise and Efficient Post-Training for Diffusion Models","display_name":"Inversion-DPO: Precise and Efficient Post-Training for Diffusion Models","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415540312","doi":"https://doi.org/10.1145/3746027.3755220"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755220","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015625724","display_name":"Zejian Li","orcid":"https://orcid.org/0000-0001-5313-2742"},"institutions":[{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zejian Li","raw_affiliation_strings":["Zhejiang University, Ningbo, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Ningbo, China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030597811","display_name":"Yize Li","orcid":"https://orcid.org/0009-0006-6774-6319"},"institutions":[{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yize Li","raw_affiliation_strings":["Zhejiang University, Ningbo, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Ningbo, China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112978959","display_name":"Chenye Meng","orcid":"https://orcid.org/0000-0002-4787-6232"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenye Meng","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhongni Liu","orcid":"https://orcid.org/0009-0007-8449-1802"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongni Liu","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022659405","display_name":"L. Yang","orcid":"https://orcid.org/0000-0003-1905-8053"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ling Yang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101950006","display_name":"Shengyuan Zhang","orcid":"https://orcid.org/0000-0003-3762-1612"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengyuan Zhang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Guang Yang","orcid":"https://orcid.org/0000-0001-8061-742X"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guang Yang","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101627882","display_name":"Changyuan Yang","orcid":"https://orcid.org/0000-0003-0065-6272"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changyuan Yang","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101043583","display_name":"Zhiyuan Yang","orcid":"https://orcid.org/0009-0005-4950-671X"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyuan Yang","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036387698","display_name":"Lingyun Sun","orcid":"https://orcid.org/0000-0002-5561-0493"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingyun Sun","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5015625724"],"corresponding_institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29443921,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"9901","last_page":"9910"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5289000272750854},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5027999877929688},{"id":"https://openalex.org/keywords/inversion","display_name":"Inversion (geology)","score":0.42590001225471497},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4194999933242798},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4009999930858612},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.37209999561309814},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.36730000376701355},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.3621000051498413}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7592999935150146},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5289000272750854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5059000253677368},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5027999877929688},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4593999981880188},{"id":"https://openalex.org/C1893757","wikidata":"https://www.wikidata.org/wiki/Q3653001","display_name":"Inversion (geology)","level":3,"score":0.42590001225471497},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.42239999771118164},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4194999933242798},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4009999930858612},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.37209999561309814},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.36730000376701355},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.3621000051498413},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3472000062465668},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.34049999713897705},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.31290000677108765},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26179999113082886},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2522999942302704},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2517000138759613},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755220","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2013035813","https://openalex.org/W2277195237","https://openalex.org/W2963184176","https://openalex.org/W2987919422","https://openalex.org/W4312740349","https://openalex.org/W4312933868","https://openalex.org/W4385270985","https://openalex.org/W4386071814","https://openalex.org/W4387968089","https://openalex.org/W4390873319","https://openalex.org/W4393159641","https://openalex.org/W4401201684","https://openalex.org/W4402753770","https://openalex.org/W4402753790","https://openalex.org/W4402816868","https://openalex.org/W4403780587","https://openalex.org/W4403780831","https://openalex.org/W4403791248","https://openalex.org/W4403791575"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,84,212],"diffusion":[3,213],"models":[4,14,112,164],"(DMs)":[5],"have":[6],"been":[7],"propelled":[8],"by":[9,67,149],"alignment":[10,61,211],"methods":[11,155],"that":[12,63],"post-train":[13],"to":[15,18,95,127,152,165,195,218],"better":[16],"conform":[17],"human":[19],"preferences.":[20],"However,":[21],"these":[22,54],"approaches":[23],"typically":[24],"require":[25],"computation-intensive":[26],"training":[27,50],"of":[28,122,131,138,160,174,184,200],"a":[29,33,59,100,128,135,180,205],"base":[30],"model":[31,47],"and":[32,49,92,97,120,134,156,191],"reward":[34,65,111],"model,":[35],"which":[36],"not":[37],"only":[38],"incurs":[39],"substantial":[40,145],"computational":[41],"overhead":[42],"but":[43],"may":[44],"also":[45],"compromise":[46],"accuracy":[48],"efficiency.":[51],"To":[52],"address":[53],"limitations,":[55],"we":[56,178],"propose":[57],"Inversion-DPO,":[58],"novel":[60],"framework":[62],"circumvents":[64],"modeling":[66],"reformulating":[68],"Direct":[69],"Preference":[70],"Optimization":[71],"(DPO)":[72],"with":[73,86,187],"DDIM":[74],"inversion":[75,89],"for":[76,109,208],"DMs.":[77],"Our":[78,223],"method":[79],"conducts":[80],"intractable":[81],"posterior":[82],"sampling":[83],"Diffusion-DPO":[85],"the":[87,107,158,161,172,197],"deterministic":[88],"from":[90],"winning":[91],"losing":[93],"samples":[94],"noise":[96],"thus":[98],"derive":[99],"new":[101,206],"post-training":[102,154,173],"paradigm.":[103],"This":[104],"paradigm":[105],"eliminates":[106],"need":[108],"auxiliary":[110],"or":[113],"inaccurate":[114],"appromixation,":[115],"significantly":[116],"enhancing":[117],"both":[118],"precision":[119],"efficiency":[121],"training.":[123],"We":[124],"apply":[125],"Inversion-DPO":[126,150,203],"basic":[129],"task":[130,137],"text-to-image":[132],"generation":[133,221],"challenging":[136],"compositional":[139,198],"image":[140,176],"generation.":[141],"Extensive":[142],"experiments":[143],"show":[144],"performance":[146],"improvements":[147],"achieved":[148],"compared":[151],"existing":[153],"highlight":[157],"ability":[159],"trained":[162],"generative":[163,201],"generate":[166],"high-fidelity":[167],"compositionally":[168],"coherent":[169],"images.":[170],"For":[171],"compostitional":[175],"geneation,":[177],"curate":[179],"paired":[181],"dataset":[182],"consisting":[183],"11,140":[185],"images":[186],"complex":[188,219],"structural":[189],"annotations":[190],"comprehensive":[192],"scores,":[193],"designed":[194],"enhance":[196],"capabilities":[199],"models.":[202],"explores":[204],"avenue":[207],"efficient,":[209],"high-precision":[210],"models,":[214],"advancing":[215],"their":[216],"applicability":[217],"realistic":[220],"tasks.":[222],"code":[224],"is":[225],"available":[226],"at":[227],"https://github.com/MIGHTYEZ/Inversion-DPO":[228]},"counts_by_year":[],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-25T00:00:00"}
