{"id":"https://openalex.org/W7158955431","doi":"https://doi.org/10.48550/arxiv.2604.26348","title":"ACPO: Anchor-Constrained Perceptual Optimization for Diffusion Models with No-Reference Quality Guidance","display_name":"ACPO: Anchor-Constrained Perceptual Optimization for Diffusion Models with No-Reference Quality Guidance","publication_year":2026,"publication_date":"2026-04-29","ids":{"openalex":"https://openalex.org/W7158955431","doi":"https://doi.org/10.48550/arxiv.2604.26348"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.26348","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26348","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.26348","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134925217","display_name":"Yang Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang, Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134906763","display_name":"Feifan Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Feifan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125934670","display_name":"Han Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Han","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134923942","display_name":"Weiming Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Weiming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5134925217"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9646999835968018,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9646999835968018,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.010700000450015068,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.007300000172108412,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6470999717712402},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5065000057220459},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4740999937057495},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.3824000060558319},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.374099999666214},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.34689998626708984},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.3346000015735626},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.3294999897480011}],"concepts":[{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6470999717712402},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6434999704360962},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5767999887466431},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5065000057220459},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4740999937057495},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3824000060558319},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38199999928474426},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.374099999666214},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.34689998626708984},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3418999910354614},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3346000015735626},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C22033958","wikidata":"https://www.wikidata.org/wiki/Q7167036","display_name":"Perceptual learning","level":3,"score":0.31610000133514404},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.3133000135421753},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.3077999949455261},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C203504353","wikidata":"https://www.wikidata.org/wiki/Q4765461","display_name":"Anisotropic diffusion","level":3,"score":0.2824999988079071},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C28063669","wikidata":"https://www.wikidata.org/wiki/Q7167042","display_name":"Perceptual system","level":3,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.26348","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26348","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.26348","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26348","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4931199848651886,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diffusion":[0],"models":[1],"have":[2],"achieved":[3],"remarkable":[4],"success":[5],"in":[6,32,135],"image":[7,73],"generation,":[8],"yet":[9],"their":[10],"training":[11,88,178],"is":[12,61],"predominantly":[13],"driven":[14],"by":[15,71],"full-reference":[16],"objectives":[17],"that":[18,62,105,127,166],"enforce":[19],"pixel-wise":[20],"similarity":[21],"to":[22,87],"ground-truth":[23],"images.Such":[24],"supervision,":[25],"while":[26,122,173],"effective":[27],"for":[28,187],"fidelity,":[29,149],"may":[30],"insufficient":[31],"terms":[33,136],"of":[34,50,137,183],"subjective":[35],"visual":[36],"perception":[37],"quality":[38,54,74,145,172],"and":[39,90,147,177],"text-image":[40],"semantic":[41],"consistency.":[42],"In":[43],"this":[44,97],"work,":[45],"we":[46,99,111],"investigate":[47],"the":[48,82,131,159,181],"problem":[49],"incorporating":[51],"no-reference":[52,72],"perceptual":[53,65,108,119,144,171,185],"into":[55],"diffusion":[56,84,133,188],"training.":[57],"A":[58],"key":[59],"challenge":[60],"directly":[63],"optimizing":[64],"signals,":[66],"such":[67],"as":[68,117],"those":[69],"provided":[70],"assessment":[75],"(NR-IQA)":[76],"models,":[77],"introduces":[78],"a":[79,113,118],"mismatch":[80],"with":[81,130],"original":[83,160],"objective,":[85],"leading":[86],"instability":[89],"distributional":[91],"drift":[92],"during":[93],"fine-tuning.":[94],"To":[95],"address":[96],"issue,":[98],"propose":[100],"an":[101,124],"anchor-constrained":[102,184],"optimization":[103,186],"framework":[104],"enables":[106],"stable":[107],"adaptation.":[109],"Specifically,":[110],"leverage":[112],"learned":[114],"NR-IQA":[115],"model":[116,134],"guidance":[120],"signal,":[121],"introducing":[123],"anchor-based":[125],"regularization":[126],"enforces":[128],"consistency":[129],"base":[132],"noise":[138],"prediction.":[139],"This":[140],"design":[141],"effectively":[142],"balances":[143],"improvement":[146],"generative":[148,161],"allowing":[150],"controlled":[151],"adaptation":[152],"toward":[153],"perceptually":[154],"favorable":[155],"outputs":[156],"without":[157],"compromising":[158],"behavior.":[162],"Extensive":[163],"experiments":[164],"demonstrate":[165],"our":[167],"method":[168],"consistently":[169],"enhances":[170],"preserving":[174],"generation":[175],"diversity":[176],"stability,":[179],"highlighting":[180],"effectiveness":[182],"models.":[189]},"counts_by_year":[],"updated_date":"2026-05-01T06:10:29.291645","created_date":"2026-05-01T00:00:00"}
