{"id":"https://openalex.org/W7161832149","doi":"https://doi.org/10.48550/arxiv.2605.19804","title":"Stitched Value Model for Diffusion Alignment","display_name":"Stitched Value Model for Diffusion Alignment","publication_year":2026,"publication_date":"2026-05-19","ids":{"openalex":"https://openalex.org/W7161832149","doi":"https://doi.org/10.48550/arxiv.2605.19804"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.19804","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19804","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.19804","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136558820","display_name":"Hyojun Go","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Go, Hyojun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006604581","display_name":"Hyungjin Chung","orcid":"https://orcid.org/0000-0003-3202-0893"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chung, Hyungjin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029046001","display_name":"Prune Truong","orcid":"https://orcid.org/0000-0002-4916-786X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Truong, Prune","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017438922","display_name":"Goutam Bhat","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhat, Goutam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136531229","display_name":"Li Mi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mi, Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136555207","display_name":"Zhaochong An","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Zhaochong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136553234","display_name":"Zixiang Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Zixiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086337795","display_name":"Dominik Narnhofer","orcid":"https://orcid.org/0000-0002-9293-4112"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Narnhofer, Dominik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136553766","display_name":"Serge Belongie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Belongie, Serge","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136586090","display_name":"Federico Tombari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tombari, Federico","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136520883","display_name":"Konrad Schindler","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schindler, Konrad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6488000154495239,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6488000154495239,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11304","display_name":"Advanced Neuroimaging Techniques and Applications","score":0.06809999793767929,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12650","display_name":"Aesthetic Perception and Analysis","score":0.025299999862909317,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-stitching","display_name":"Image stitching","score":0.8205000162124634},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.5846999883651733},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.5821999907493591},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.5730999708175659},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5652999877929688},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5012000203132629},{"id":"https://openalex.org/keywords/markov-chain-monte-carlo","display_name":"Markov chain Monte Carlo","score":0.4722000062465668},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.36320000886917114}],"concepts":[{"id":"https://openalex.org/C29081049","wikidata":"https://www.wikidata.org/wiki/Q1364242","display_name":"Image stitching","level":2,"score":0.8205000162124634},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6811000108718872},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.5846999883651733},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.5821999907493591},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.5730999708175659},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5652999877929688},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5012000203132629},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4984000027179718},{"id":"https://openalex.org/C111350023","wikidata":"https://www.wikidata.org/wiki/Q1191869","display_name":"Markov chain Monte Carlo","level":3,"score":0.4722000062465668},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4309000074863434},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.41280001401901245},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.36320000886917114},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.36000001430511475},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.35190001130104065},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.3513000011444092},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2937999963760376},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.29179999232292175},{"id":"https://openalex.org/C52421305","wikidata":"https://www.wikidata.org/wiki/Q1151499","display_name":"Particle filter","level":3,"score":0.289900004863739},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.26429998874664307}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.19804","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19804","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.19804","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19804","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.43510884046554565,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"For":[0],"practical":[1],"use,":[2],"diffusion-":[3],"or":[4,18,51],"flow-based":[5],"generative":[6],"models":[7,120,202],"must":[8],"be":[9,83],"aligned":[10],"with":[11],"task-specific":[12],"rewards,":[13],"such":[14],"as":[15,147],"prompt":[16],"fidelity":[17],"aesthetic":[19],"preference.":[20],"That":[21],"alignment":[22,36],"is":[23,28,180,234],"challenging":[24],"because":[25],"the":[26,35,126,151,154,165,223,226,230],"reward":[27,119,137,162,201],"defined":[29],"for":[30,105,122,229],"clean":[31,123],"output":[32],"images,":[33],"but":[34,66,75,88],"procedure":[37,179],"requires":[38],"value":[39,86,102,224],"function":[40,228],"estimates":[41,63,71],"at":[42],"noisy":[43,106,127,175,232],"intermediate":[44],"latents.":[45,107,176],"Existing":[46],"methods":[47],"resort":[48],"to":[49,95,125,145,173,203],"Tweedie-style":[50],"Monte":[52,69],"Carlo":[53,70],"approximations,":[54],"trading":[55],"off":[56],"estimator":[57],"bias":[58],"against":[59],"computational":[60],"cost:":[61],"Tweedie":[62],"are":[64,72],"efficient":[65],"biased,":[67],"while":[68,266],"more":[73],"accurate":[74],"require":[76],"expensive":[77],"rollouts.":[78],"A":[79],"natural":[80],"alternative":[81],"would":[82],"a":[84,98,112,141,158,209,253],"learned":[85],"function,":[87,225],"it":[89,146,168],"remains":[90],"an":[91,133],"open":[92],"question":[93],"how":[94],"effectively":[96],"train":[97],"strong":[99],"and":[100,139,185,189,237,243,259,271],"general":[101],"model":[103,113,138],"specifically":[104],"Here,":[108],"we":[109],"propose":[110],"StitchVM,":[111],"stitching":[114,178,184],"framework":[115],"that":[116,247],"efficiently":[117],"transfers":[118],"pretrained":[121],"images":[124],"latent":[128,204],"regime.":[129],"StitchVM":[130,206],"starts":[131],"from":[132,164],"existing,":[134],"truncated":[135],"pixel-space":[136,152,200],"attaches":[140],"frozen":[142],"diffusion":[143,166,213],"backbone":[144],"its":[148,170],"head.":[149],"From":[150],"model,":[153],"resulting":[155],"hybrid":[156],"retains":[157],"carefully":[159],"pretrained,":[160],"robust":[161],"capability;":[163],"backbone,":[167],"inherits":[169],"native":[171],"ability":[172],"handle":[174],"The":[177],"exceptionally":[181],"lightweight,":[182],"e.g.,":[183],"finetuning":[186],"CLIP":[187],"ViT-L":[188],"SD":[190],"3.5":[191],"Medium":[192],"takes":[193],"only":[194],"10":[195],"GPU-hours.":[196],"By":[197],"lifting":[198],"powerful":[199],"space,":[205],"opens":[207],"up":[208],"new":[210],"style":[211],"of":[212,216,222,256],"alignment:":[214],"instead":[215],"rough,":[217],"yet":[218],"costly":[219],"per-sample":[220],"approximation":[221],"correct":[227],"actual,":[231],"latents":[233],"constructed":[235],"once":[236],"then":[238],"amortized":[239],"over":[240],"many":[241],"samples":[242],"iterations.":[244],"We":[245],"show":[246],"this":[248],"approach":[249],"yields":[250],"improvements":[251],"across":[252],"broad":[254],"range":[255],"downstream":[257],"steering":[258],"post-training":[260],"methods:":[261],"DPS":[262],"becomes":[263,273],"$3.2\\times$":[264],"faster":[265],"halving":[267],"peak":[268],"GPU":[269],"memory,":[270],"DiffusionNFT":[272],"$2.3\\times$":[274],"faster.":[275]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-21T00:00:00"}
