{"id":"https://openalex.org/W7140383487","doi":"https://doi.org/10.48550/arxiv.2603.23903","title":"Latent Bias Alignment for High-Fidelity Diffusion Inversion in Real-World Image Reconstruction and Manipulation","display_name":"Latent Bias Alignment for High-Fidelity Diffusion Inversion in Real-World Image Reconstruction and Manipulation","publication_year":2026,"publication_date":"2026-03-25","ids":{"openalex":"https://openalex.org/W7140383487","doi":"https://doi.org/10.48550/arxiv.2603.23903"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.23903","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23903","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.23903","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130638885","display_name":"Weiming Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Weiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130716261","display_name":"Qifan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Qifan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130661790","display_name":"Siyi Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Siyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039193712","display_name":"Yushun Tang","orcid":"https://orcid.org/0000-0002-8350-7637"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Yushun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057872214","display_name":"Yijia Wang","orcid":"https://orcid.org/0009-0004-2250-9163"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yijia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101207263","display_name":"Zhihan Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Zhihan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130646067","display_name":"Zhihai He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Zhihai","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5130638885"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6057999730110168,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6057999730110168,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.10050000250339508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11304","display_name":"Advanced Neuroimaging Techniques and Applications","score":0.06030000001192093,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inversion","display_name":"Inversion (geology)","score":0.708899974822998},{"id":"https://openalex.org/keywords/iterative-reconstruction","display_name":"Iterative reconstruction","score":0.4814999997615814},{"id":"https://openalex.org/keywords/image-quality","display_name":"Image quality","score":0.46129998564720154},{"id":"https://openalex.org/keywords/diffusion-process","display_name":"Diffusion process","score":0.36570000648498535},{"id":"https://openalex.org/keywords/anisotropic-diffusion","display_name":"Anisotropic diffusion","score":0.3483000099658966},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3361999988555908},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.3352999985218048}],"concepts":[{"id":"https://openalex.org/C1893757","wikidata":"https://www.wikidata.org/wiki/Q3653001","display_name":"Inversion (geology)","level":3,"score":0.708899974822998},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5879999995231628},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5180000066757202},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5091000199317932},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.4814999997615814},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.46129998564720154},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4456999897956848},{"id":"https://openalex.org/C68710425","wikidata":"https://www.wikidata.org/wiki/Q5275442","display_name":"Diffusion process","level":3,"score":0.36570000648498535},{"id":"https://openalex.org/C203504353","wikidata":"https://www.wikidata.org/wiki/Q4765461","display_name":"Anisotropic diffusion","level":3,"score":0.3483000099658966},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3361999988555908},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.3352999985218048},{"id":"https://openalex.org/C205372313","wikidata":"https://www.wikidata.org/wiki/Q355645","display_name":"Latent image","level":3,"score":0.335099995136261},{"id":"https://openalex.org/C135252773","wikidata":"https://www.wikidata.org/wiki/Q1567213","display_name":"Inverse problem","level":2,"score":0.32839998602867126},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.28139999508857727},{"id":"https://openalex.org/C55128770","wikidata":"https://www.wikidata.org/wiki/Q5275440","display_name":"Diffusion map","level":4,"score":0.267300009727478},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.23903","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23903","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.23903","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23903","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.6571329236030579,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"research":[1],"has":[2],"shown":[3],"that":[4,186],"text-to-image":[5],"diffusion":[6,38,50,57,88,96,150,198],"models":[7,51],"are":[8],"capable":[9],"of":[10,148,196,205],"generating":[11],"high-quality":[12],"images":[13,28],"guided":[14],"by":[15,156],"text":[16],"prompts.":[17],"But":[18],"can":[19],"they":[20],"be":[21,74],"used":[22],"to":[23,73,122,133,158,174],"generate":[24],"or":[25,66],"approximate":[26,145],"real-world":[27,53],"from":[29,62],"the":[30,37,78,81,87,92,95,100,124,149,160,167,187,192,197,203],"seed":[31],"noise?":[32],"This":[33],"is":[34,120],"known":[35],"as":[36,43,136,166,177,200,202],"inversion":[39,58,82,97,117,127,151],"problem,":[40],"which":[41,119,164],"serves":[42,165],"a":[44,111],"fundamental":[45],"building":[46],"block":[47],"for":[48],"bridging":[49],"and":[52,83,90,99,128,152,211],"scenarios.":[54],"However,":[55],"existing":[56],"methods":[59],"often":[60],"suffer":[61],"low":[63],"reconstruction":[64,154,194],"quality":[65,195],"weak":[67],"robustness.":[68],"Two":[69],"major":[70],"challenges":[71],"need":[72],"carefully":[75],"addressed:":[76],"(1)":[77],"misalignment":[79,125],"between":[80,94,126,170],"generation":[84,129],"trajectories":[85],"during":[86],"process,":[89],"(2)":[91],"mismatch":[93],"process":[98],"VQ":[101],"autoencoder":[102],"(VQAE)":[103],"reconstruction.":[104],"To":[105],"address":[106],"these":[107],"challenges,":[108],"we":[109,142],"introduce":[110],"latent":[112,162],"bias":[113],"vector":[114],"at":[115],"each":[116],"step,":[118],"learned":[121],"reduce":[123],"trajectories.":[130],"We":[131,172],"refer":[132,173],"this":[134,175],"strategy":[135],"Latent":[137,179],"Bias":[138],"Optimization":[139],"(LBO).":[140],"Furthermore,":[141],"perform":[143],"an":[144],"joint":[146],"optimization":[147],"VQAE":[153],"processes":[155],"learning":[157],"adjust":[159],"image":[161,193,209],"representation,":[163],"connecting":[168],"interface":[169],"them.":[171],"technique":[176],"Image":[178],"Boosting":[180],"(ILB).":[181],"Extensive":[182],"experimental":[183],"results":[184],"demonstrate":[185],"proposed":[188],"method":[189],"significantly":[190],"improves":[191],"model,":[199],"well":[201],"performance":[204],"downstream":[206],"tasks,":[207],"including":[208],"editing":[210],"rare":[212],"concept":[213],"generation.":[214]},"counts_by_year":[],"updated_date":"2026-03-27T06:05:27.210665","created_date":"2026-03-27T00:00:00"}
