{"id":"https://openalex.org/W7103145302","doi":"https://doi.org/10.48550/arxiv.2510.25818","title":"ScaleDiff: Higher-Resolution Image Synthesis via Efficient and Model-Agnostic Diffusion","display_name":"ScaleDiff: Higher-Resolution Image Synthesis via Efficient and Model-Agnostic Diffusion","publication_year":2025,"publication_date":"2025-10-29","ids":{"openalex":"https://openalex.org/W7103145302","doi":"https://doi.org/10.48550/arxiv.2510.25818"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2510.25818","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.25818","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2510.25818","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Koh, Sungho","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Koh, Sungho","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cha, SeungJu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cha, SeungJu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Oh, Hyunwoo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oh, Hyunwoo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Lee, Kwanyoung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Kwanyoung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Kim, Dong-Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Dong-Jin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8644000291824341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8644000291824341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.029899999499320984,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.017400000244379044,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.6384999752044678},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6273999810218811},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5311999917030334},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.41339999437332153},{"id":"https://openalex.org/keywords/anisotropic-diffusion","display_name":"Anisotropic diffusion","score":0.4059999883174896},{"id":"https://openalex.org/keywords/image-quality","display_name":"Image quality","score":0.3718999922275543},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.36640000343322754},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.3619999885559082}],"concepts":[{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.6384999752044678},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6301000118255615},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6273999810218811},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5311999917030334},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.47609999775886536},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.41339999437332153},{"id":"https://openalex.org/C203504353","wikidata":"https://www.wikidata.org/wiki/Q4765461","display_name":"Anisotropic diffusion","level":3,"score":0.4059999883174896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39640000462532043},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.3718999922275543},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.36640000343322754},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.3619999885559082},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.35010001063346863},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3334999978542328},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.31679999828338623},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3050999939441681},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2937999963760376},{"id":"https://openalex.org/C205372480","wikidata":"https://www.wikidata.org/wiki/Q210521","display_name":"Image resolution","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C2983327147","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Image denoising","level":3,"score":0.2806999981403351}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2510.25818","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.25818","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2510.25818","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.25818","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text-to-image":[0],"diffusion":[1,53],"models":[2,54],"often":[3,23],"exhibit":[4],"degraded":[5],"performance":[6,122],"when":[7],"generating":[8],"images":[9],"beyond":[10],"their":[11],"training":[12],"resolution.":[13],"Recent":[14],"training-free":[15,124],"methods":[16,125],"can":[17],"mitigate":[18],"this":[19,36],"limitation,":[20],"but":[21],"they":[22],"require":[24],"substantial":[25],"computation":[26],"or":[27],"are":[28],"incompatible":[29],"with":[30,81],"recent":[31],"Diffusion":[32,139],"Transformer":[33,140],"models.":[34],"In":[35],"paper,":[37],"we":[38,103],"propose":[39],"ScaleDiff,":[40],"a":[41],"model-agnostic":[42],"and":[43,91,132,138],"highly":[44],"efficient":[45,71],"framework":[46,64],"for":[47],"extending":[48],"the":[49,78,112],"resolution":[50],"of":[51,62,128],"pretrained":[52],"without":[55],"any":[56],"additional":[57],"training.":[58],"A":[59],"core":[60],"component":[61],"our":[63],"is":[65],"Neighborhood":[66],"Patch":[67],"Attention":[68],"(NPA),":[69],"an":[70,88],"mechanism":[72],"that":[73,118],"reduces":[74],"computational":[75],"redundancy":[76],"in":[77,126],"self-attention":[79],"layer":[80],"non-overlapping":[82],"patches.":[83],"We":[84],"integrate":[85],"NPA":[86],"into":[87],"SDEdit":[89],"pipeline":[90],"introduce":[92],"Latent":[93],"Frequency":[94],"Mixing":[95],"(LFM)":[96],"to":[97,107],"better":[98],"generate":[99],"fine":[100],"details.":[101],"Furthermore,":[102],"apply":[104],"Structure":[105],"Guidance":[106],"enhance":[108],"global":[109],"structure":[110],"during":[111],"denoising":[113],"process.":[114],"Experimental":[115],"results":[116],"demonstrate":[117],"ScaleDiff":[119],"achieves":[120],"state-of-the-art":[121],"among":[123],"terms":[127],"both":[129,136],"image":[130],"quality":[131],"inference":[133],"speed":[134],"on":[135],"U-Net":[137],"architectures.":[141]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-11-01T00:00:00"}
